diff --git a/.gitignore b/.gitignore index ce18cfcde8751..9fbbe9cfc0dd3 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,8 @@ autoconf/autom4te.cache # VS2017 and VSCode config files. .vscode .vs +# pythonenv for github Codespaces +pythonenv* # clangd index. (".clangd" is a config file now, thus trailing slash) .clangd/ .cache diff --git a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp index 04dc61f02df1e..44ae380b63b2e 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp @@ -338,7 +338,7 @@ void UseAutoCheck::replaceIterators(const DeclStmt *D, ASTContext *Context) { // Drill down to the as-written initializer. const Expr *E = (*Construct->arg_begin())->IgnoreParenImpCasts(); - if (E != E->IgnoreConversionOperator()) { + if (E != E->IgnoreConversionOperatorSingleStep()) { // We hit a conversion operator. Early-out now as they imply an implicit // conversion from a different type. Could also mean an explicit // conversion from the same type but that's pretty rare. diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index 9dcb10b9d20c4..7e8ba4eb90c65 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -205,7 +205,7 @@ std::string compareExpressionToZero(const MatchFinder::MatchResult &Result, std::string replacementExpression(const MatchFinder::MatchResult &Result, bool Negated, const Expr *E) { - E = E->ignoreParenBaseCasts(); + E = E->IgnoreParenBaseCasts(); if (const auto *EC = dyn_cast(E)) E = EC->getSubExpr(); diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst index 8bc97f4114ae5..c2746914e754a 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst @@ -83,6 +83,8 @@ Known limitations The ``else`` branch is not checked currently for negated condition variable: +.. code-block:: c + bool onFire = isBurning(); if (onFire) { scream(); diff --git a/clang/docs/Block-ABI-Apple.rst b/clang/docs/Block-ABI-Apple.rst index d038cdfe9bd20..e21a8b68b5cd1 100644 --- a/clang/docs/Block-ABI-Apple.rst +++ b/clang/docs/Block-ABI-Apple.rst @@ -35,7 +35,8 @@ High Level ========== The ABI of ``Blocks`` consist of their layout and the runtime functions required -by the compiler. A ``Block`` consists of a structure of the following form: +by the compiler. A ``Block`` of type ``R (^)(P...)`` consists of a structure of +the following form: .. code-block:: c @@ -43,7 +44,7 @@ by the compiler. A ``Block`` consists of a structure of the following form: void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock int flags; int reserved; - void (*invoke)(void *, ...); + R (*invoke)(struct Block_literal_1 *, P...); struct Block_descriptor_1 { unsigned long int reserved; // NULL unsigned long int size; // sizeof(struct Block_literal_1) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index c35718b51248c..72a25032151ff 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -758,7 +758,24 @@ the configuration (without a prefix: ``Auto``). int bbbbbbbbbbbbbbbbbbbbb) { } +**AttributeMacros** (``std::vector``) + A vector of strings that should be interpreted as attributes/qualifiers + instead of identifiers. This can be useful for language extensions or + static analyzer annotations: + .. code-block:: c++ + + x = (char *__capability)&y; + int function(void) __ununsed; + void only_writes_to_buffer(char *__output buffer); + + In the .clang-format configuration file, this can be configured like: + + .. code-block:: yaml + + AttributeMacros: ['__capability', '__output', '__ununsed'] + + For example: __capability. **BinPackArguments** (``bool``) If ``false``, a function call's arguments will either be all on the diff --git a/clang/docs/ThreadSafetyAnalysis.rst b/clang/docs/ThreadSafetyAnalysis.rst index ea8e98a1884bf..e4a3342c02bd8 100644 --- a/clang/docs/ThreadSafetyAnalysis.rst +++ b/clang/docs/ThreadSafetyAnalysis.rst @@ -209,21 +209,21 @@ must be held on entry to the function, *and must still be held on exit*. } -ACQUIRE(...), ACQUIRE_SHARED(...), RELEASE(...), RELEASE_SHARED(...) --------------------------------------------------------------------- +ACQUIRE(...), ACQUIRE_SHARED(...), RELEASE(...), RELEASE_SHARED(...), RELEASE_GENERIC(...) +------------------------------------------------------------------------------------------ *Previously*: ``EXCLUSIVE_LOCK_FUNCTION``, ``SHARED_LOCK_FUNCTION``, ``UNLOCK_FUNCTION`` -``ACQUIRE`` is an attribute on functions or methods, which -declares that the function acquires a capability, but does not release it. The -caller must not hold the given capability on entry, and it will hold the -capability on exit. ``ACQUIRE_SHARED`` is similar. +``ACQUIRE`` and ``ACQUIRE_SHARED`` are attributes on functions or methods +declaring that the function acquires a capability, but does not release it. +The given capability must not be held on entry, and will be held on exit +(exclusively for ``ACQUIRE``, shared for ``ACQUIRE_SHARED``). -``RELEASE`` and ``RELEASE_SHARED`` declare that the function releases the given -capability. The caller must hold the capability on entry, and will no longer -hold it on exit. It does not matter whether the given capability is shared or -exclusive. +``RELEASE``, ``RELEASE_SHARED``, and ``RELEASE_GENERIC`` declare that the +function releases the given capability. The capability must be held on entry +(exclusively for ``RELEASE``, shared for ``RELEASE_SHARED``, exclusively or +shared for ``RELEASE_GENERIC``), and will no longer be held on exit. .. code-block:: c++ @@ -402,6 +402,13 @@ the destructor. Such classes require special handling because the constructor and destructor refer to the capability via different names; see the ``MutexLocker`` class in :ref:`mutexheader`, below. +Scoped capabilities are treated as capabilities that are implicitly acquired +on construction and released on destruction. They are associated with +the set of (regular) capabilities named in thread safety attributes on the +constructor. Acquire-type attributes on other member functions are treated as +applying to that set of associated capabilities, while ``RELEASE`` implies that +a function releases all associated capabilities in whatever mode they're held. + TRY_ACQUIRE(, ...), TRY_ACQUIRE_SHARED(, ...) --------------------------------------------------------- @@ -414,6 +421,26 @@ The first argument must be ``true`` or ``false``, to specify which return value indicates success, and the remaining arguments are interpreted in the same way as ``ACQUIRE``. See :ref:`mutexheader`, below, for example uses. +Because the analysis doesn't support conditional locking, a capability is +treated as acquired after the first branch on the return value of a try-acquire +function. + +.. code-block:: c++ + + Mutex mu; + int a GUARDED_BY(mu); + + void foo() { + bool success = mu.TryLock(); + a = 0; // Warning, mu is not locked. + if (success) { + a = 0; // Ok. + mu.Unlock(); + } else { + a = 0; // Warning, mu is not locked. + } + } + ASSERT_CAPABILITY(...) and ASSERT_SHARED_CAPABILITY(...) -------------------------------------------------------- @@ -800,6 +827,9 @@ implementation. #define RELEASE_SHARED(...) \ THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) + #define RELEASE_GENERIC(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(release_generic_capability(__VA_ARGS__)) + #define TRY_ACQUIRE(...) \ THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) @@ -844,6 +874,9 @@ implementation. // Release/unlock a shared mutex. void ReaderUnlock() RELEASE_SHARED(); + // Generic unlock, can unlock exclusive and shared mutexes. + void GenericUnlock() RELEASE_GENERIC(); + // Try to acquire the mutex. Returns true on success, and false on failure. bool TryLock() TRY_ACQUIRE(true); @@ -860,19 +893,78 @@ implementation. const Mutex& operator!() const { return *this; } }; + // Tag types for selecting a constructor. + struct adopt_lock_t {} inline constexpr adopt_lock = {}; + struct defer_lock_t {} inline constexpr defer_lock = {}; + struct shared_lock_t {} inline constexpr shared_lock = {}; // MutexLocker is an RAII class that acquires a mutex in its constructor, and // releases it in its destructor. class SCOPED_CAPABILITY MutexLocker { private: Mutex* mut; + bool locked; public: - MutexLocker(Mutex *mu) ACQUIRE(mu) : mut(mu) { + // Acquire mu, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu) ACQUIRE(mu) : mut(mu), locked(true) { mu->Lock(); } + + // Assume mu is held, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, adopt_lock_t) REQUIRES(mu) : mut(mu), locked(true) {} + + // Acquire mu in shared mode, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, shared_lock_t) ACQUIRE_SHARED(mu) : mut(mu), locked(true) { + mu->ReaderLock(); + } + + // Assume mu is held in shared mode, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, adopt_lock_t, shared_lock_t) REQUIRES_SHARED(mu) + : mut(mu), locked(true) {} + + // Assume mu is not held, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, defer_lock_t) EXCLUDES(mu) : mut(mu), locked(false) {} + + // Release *this and all associated mutexes, if they are still held. + // There is no warning if the scope was already unlocked before. ~MutexLocker() RELEASE() { + if (locked) + mut->GenericUnlock(); + } + + // Acquire all associated mutexes exclusively. + void Lock() ACQUIRE() { + mut->Lock(); + locked = true; + } + + // Try to acquire all associated mutexes exclusively. + bool TryLock() TRY_ACQUIRE(true) { + return locked = mut->TryLock(); + } + + // Acquire all associated mutexes in shared mode. + void ReaderLock() ACQUIRE_SHARED() { + mut->ReaderLock(); + locked = true; + } + + // Try to acquire all associated mutexes in shared mode. + bool ReaderTryLock() TRY_ACQUIRE_SHARED(true) { + return locked = mut->ReaderTryLock(); + } + + // Release all associated mutexes. Warn on double unlock. + void Unlock() RELEASE() { mut->Unlock(); + locked = false; + } + + // Release all associated mutexes. Warn on double unlock. + void ReaderUnlock() RELEASE() { + mut->ReaderUnlock(); + locked = false; } }; diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 3b378f735ebcc..7a294f916bcf9 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1747,7 +1747,7 @@ Check for integer to enumeration casts that could result in undefined values. void foo() { TestEnum t = static_cast(-1); // warn: the value provided to the cast expression is not in - the valid range of values for the enum + // the valid range of values for the enum .. _alpha-cplusplus-InvalidatedIterator: diff --git a/clang/examples/Attribute/CMakeLists.txt b/clang/examples/Attribute/CMakeLists.txt index ed02f5e5992f5..42f04f5039bc7 100644 --- a/clang/examples/Attribute/CMakeLists.txt +++ b/clang/examples/Attribute/CMakeLists.txt @@ -1,7 +1,7 @@ add_llvm_library(Attribute MODULE Attribute.cpp PLUGIN_TOOL clang) if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN)) - target_link_libraries(Attribute ${cmake_2_8_12_PRIVATE} + target_link_libraries(Attribute PRIVATE clangAST clangBasic clangFrontend diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index e0a998ac20a9c..bec6af48b941c 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2940,6 +2940,26 @@ CINDEX_LINKAGE int clang_getCursorPlatformAvailability( CINDEX_LINKAGE void clang_disposeCXPlatformAvailability(CXPlatformAvailability *availability); +/** + * If cursor refers to a variable declaration and it has initializer returns + * cursor referring to the initializer otherwise return null cursor. + */ +CINDEX_LINKAGE CXCursor clang_Cursor_getVarDeclInitializer(CXCursor cursor); + +/** + * If cursor refers to a variable declaration that has global storage returns 1. + * If cursor refers to a variable declaration that doesn't have global storage + * returns 0. Otherwise returns -1. + */ +CINDEX_LINKAGE int clang_Cursor_hasVarDeclGlobalStorage(CXCursor cursor); + +/** + * If cursor refers to a variable declaration that has external storage + * returns 1. If cursor refers to a variable declaration that doesn't have + * external storage returns 0. Otherwise returns -1. + */ +CINDEX_LINKAGE int clang_Cursor_hasVarDeclExternalStorage(CXCursor cursor); + /** * Describe the "language" of the entity referred to by a cursor. */ diff --git a/clang/include/clang-c/Rewrite.h b/clang/include/clang-c/Rewrite.h new file mode 100644 index 0000000000000..ce1b05594b384 --- /dev/null +++ b/clang/include/clang-c/Rewrite.h @@ -0,0 +1,63 @@ +/*===-- clang-c/Rewrite.h - C CXRewriter --------------------------*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_CLANG_C_REWRITE_H +#define LLVM_CLANG_C_REWRITE_H + +#include "clang-c/CXString.h" +#include "clang-c/ExternC.h" +#include "clang-c/Index.h" +#include "clang-c/Platform.h" + +LLVM_CLANG_C_EXTERN_C_BEGIN + +typedef void *CXRewriter; + +/** + * Create CXRewriter. + */ +CINDEX_LINKAGE CXRewriter clang_CXRewriter_create(CXTranslationUnit TU); + +/** + * Insert the specified string at the specified location in the original buffer. + */ +CINDEX_LINKAGE void clang_CXRewriter_insertTextBefore(CXRewriter Rew, CXSourceLocation Loc, + const char *Insert); + +/** + * Replace the specified range of characters in the input with the specified + * replacement. + */ +CINDEX_LINKAGE void clang_CXRewriter_replaceText(CXRewriter Rew, CXSourceRange ToBeReplaced, + const char *Replacement); + +/** + * Remove the specified range. + */ +CINDEX_LINKAGE void clang_CXRewriter_removeText(CXRewriter Rew, CXSourceRange ToBeRemoved); + +/** + * Save all changed files to disk. + * Returns 1 if any files were not saved successfully, returns 0 otherwise. + */ +CINDEX_LINKAGE int clang_CXRewriter_overwriteChangedFiles(CXRewriter Rew); + +/** + * Write out rewritten version of the main file to stdout. + */ +CINDEX_LINKAGE void clang_CXRewriter_writeMainFileToStdOut(CXRewriter Rew); + +/** + * Free the given CXRewriter. + */ +CINDEX_LINKAGE void clang_CXRewriter_dispose(CXRewriter Rew); + +LLVM_CLANG_C_EXTERN_C_END + +#endif diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h index 87e4bd7f84c11..5103cfa8604e5 100644 --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -304,7 +304,7 @@ class APValue { MakeComplexFloat(); setComplexFloat(std::move(R), std::move(I)); } APValue(const APValue &RHS); - APValue(APValue &&RHS) : Kind(None) { swap(RHS); } + APValue(APValue &&RHS); APValue(LValueBase B, const CharUnits &O, NoLValuePath N, bool IsNullPtr = false) : Kind(None) { @@ -339,6 +339,9 @@ class APValue { return Result; } + APValue &operator=(const APValue &RHS); + APValue &operator=(APValue &&RHS); + ~APValue() { if (Kind != None && Kind != Indeterminate) DestroyDataAndMakeUninit(); @@ -591,12 +594,6 @@ class APValue { ((AddrLabelDiffData*)(char*)Data.buffer)->RHSExpr = RHSExpr; } - /// Assign by swapping from a copy of the RHS. - APValue &operator=(APValue RHS) { - swap(RHS); - return *this; - } - private: void DestroyDataAndMakeUninit(); void MakeInt() { diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 4feb1d45251d5..9e22543761501 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -77,7 +77,7 @@ class TemplateParameterList final /// The number of template parameters in this template /// parameter list. - unsigned NumParams : 30; + unsigned NumParams : 29; /// Whether this template parameter list contains an unexpanded parameter /// pack. diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 5edca25937896..26e52ad367f81 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -867,9 +867,9 @@ class Expr : public ValueStmt { /// Skip conversion operators. If this Expr is a call to a conversion /// operator, return the argument. - Expr *IgnoreConversionOperator() LLVM_READONLY; - const Expr *IgnoreConversionOperator() const { - return const_cast(this)->IgnoreConversionOperator(); + Expr *IgnoreConversionOperatorSingleStep() LLVM_READONLY; + const Expr *IgnoreConversionOperatorSingleStep() const { + return const_cast(this)->IgnoreConversionOperatorSingleStep(); } /// Skip past any parentheses and lvalue casts which might surround this @@ -901,9 +901,9 @@ class Expr : public ValueStmt { /// * What IgnoreParens() skips /// * CastExpr which represent a derived-to-base cast (CK_DerivedToBase, /// CK_UncheckedDerivedToBase and CK_NoOp) - Expr *ignoreParenBaseCasts() LLVM_READONLY; - const Expr *ignoreParenBaseCasts() const { - return const_cast(this)->ignoreParenBaseCasts(); + Expr *IgnoreParenBaseCasts() LLVM_READONLY; + const Expr *IgnoreParenBaseCasts() const { + return const_cast(this)->IgnoreParenBaseCasts(); } /// Determine whether this expression is a default function argument. diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h new file mode 100644 index 0000000000000..15d31f3af9954 --- /dev/null +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -0,0 +1,61 @@ +//===--- IgnoreExpr.h - Ignore intermediate Expressions -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common functions to ignore intermediate expression nodes +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_IGNOREEXPR_H +#define LLVM_CLANG_AST_IGNOREEXPR_H + +#include "clang/AST/Expr.h" + +namespace clang { +namespace detail { +/// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, +/// Return Fn_n(...(Fn_1(E))) +inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; }; +template +Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { + return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); +} +} // namespace detail + +/// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, +/// Recursively apply each of the functions to E until reaching a fixed point. +/// Note that a null E is valid; in this case nothing is done. +template Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { + Expr *LastE = nullptr; + while (E != LastE) { + LastE = E; + E = detail::IgnoreExprNodesImpl(E, std::forward(Fns)...); + } + return E; +} + +Expr *IgnoreImplicitCastsSingleStep(Expr *E); + +Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E); + +Expr *IgnoreCastsSingleStep(Expr *E); + +Expr *IgnoreLValueCastsSingleStep(Expr *E); + +Expr *IgnoreBaseCastsSingleStep(Expr *E); + +Expr *IgnoreImplicitSingleStep(Expr *E); + +Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E); + +Expr *IgnoreParensOnlySingleStep(Expr *E); + +Expr *IgnoreParensSingleStep(Expr *E); + +} // namespace clang + +#endif // LLVM_CLANG_AST_IGNOREEXPR_H diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index fac9245c7a3d5..f9bb41bf0635a 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5407,10 +5407,6 @@ to the SVE predicate type ``svbool_t``, this excludes tuple types such as ``N==__ARM_FEATURE_SVE_BITS``, the implementation defined feature macro that is enabled under the ``-msve-vector-bits`` flag. -NOTE: This feature is currently WIP, the ``-msve-vector-bits=`` flag defines -the ``__ARM_FEATURE_SVE_BITS_EXPERIMENTAL`` macro. This feature is complete -when experimental is dropped. - For more information See `Arm C Language Extensions for SVE `_ for more information. }]; diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index b9824588939b2..89dd03075b28f 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -100,6 +100,11 @@ BUILTIN(__builtin_altivec_vmulouh, "V4UiV8UsV8Us", "") BUILTIN(__builtin_altivec_vmulosh, "V4SiV8SsV8Ss", "") BUILTIN(__builtin_altivec_vmulouw, "V2ULLiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vmulosw, "V2SLLiV4SiV4Si", "") +BUILTIN(__builtin_altivec_vmuleud, "V1ULLLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vmulesd, "V1SLLLiV2SLLiV2SLLi", "") +BUILTIN(__builtin_altivec_vmuloud, "V1ULLLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vmulosd, "V1SLLLiV2SLLiV2SLLi", "") +BUILTIN(__builtin_altivec_vmsumcud, "V1ULLLiV2ULLiV2ULLiV1ULLLi", "") BUILTIN(__builtin_altivec_vnmsubfp, "V4fV4fV4fV4f", "") @@ -317,6 +322,13 @@ BUILTIN(__builtin_altivec_vmulhuw, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "") BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Expand with Mask built-ins. +BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "") +BUILTIN(__builtin_altivec_vexpandhm, "V8UsV8Us", "") +BUILTIN(__builtin_altivec_vexpandwm, "V4UiV4Ui", "") +BUILTIN(__builtin_altivec_vexpanddm, "V2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vexpandqm, "V1ULLLiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 758dfbc1d283d..ec77f68062e7a 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -14,20 +14,17 @@ // //===----------------------------------------------------------------------===// #ifndef CODEGENOPT -#error Define the CODEGENOPT macro to handle language options +# error Define the CODEGENOPT macro to handle language options #endif #ifndef VALUE_CODEGENOPT -#define VALUE_CODEGENOPT(Name, Bits, Default) CODEGENOPT(Name, Bits, Default) +# define VALUE_CODEGENOPT(Name, Bits, Default) \ +CODEGENOPT(Name, Bits, Default) #endif #ifndef ENUM_CODEGENOPT -#define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ - CODEGENOPT(Name, Bits, Default) -#endif - -#ifndef TYPED_CODEGENOPT -#define TYPED_CODEGENOPT(Type, Name, Description) +# define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ +CODEGENOPT(Name, Bits, Default) #endif CODEGENOPT(DisableIntegratedAS, 1, 0) ///< -no-integrated-as @@ -148,7 +145,7 @@ CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions , 1, 0) ///< Set when -fmerge-functions is enabled. -CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemprof is enabled. +CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemory-profile is enabled. CODEGENOPT(MSVolatile , 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is @@ -398,226 +395,6 @@ CODEGENOPT(KeepStaticConsts, 1, 0) /// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) -TYPED_CODEGENOPT( - std::string, BBSections, - "This field stores one of the allowed values for the option " - "-fbasic-block-sections=. The allowed values with this option are: " - "{\"labels\", \"all\", \"list=\", \"none\"}. \"labels\": Only " - "generate basic block symbols (labels) for all basic blocks, do not " - "generate unique sections for basic blocks. Use the machine basic block id " - "in the symbol name to associate profile info from virtual address to " - "machine basic block. \"all\" : Generate basic block sections for " - "all basic blocks. \"list=\": Generate basic block sections for a " - "subset of basic blocks. The functions and the machine basic block ids are " - "specified in the file. \"none\": Disable sections/labels for basic " - "blocks.") - -TYPED_CODEGENOPT(std::string, CodeModel, "The code model to use (-mcmodel).") - -TYPED_CODEGENOPT(std::string, CoverageDataFile, - "The filename with path we use for coverage data files. The " - "runtime allows further manipulation with the GCOV_PREFIX and " - "GCOV_PREFIX_STRIP environment variables. The filename with " - "path we use for coverage notes files.") -TYPED_CODEGENOPT(std::string, CoverageNotesFile, "") - -TYPED_CODEGENOPT( - std::string, ProfileFilterFiles, - "Regexes separated by a semi-colon to filter the files to instrument.") - -TYPED_CODEGENOPT( - std::string, ProfileExcludeFiles, - "Regexes separated by a semi-colon to filter the files to not instrument.") - -TYPED_CODEGENOPT(CoverageVersionTy, CoverageVersion, - "The version string to put into coverage files.") - -TYPED_CODEGENOPT(std::string, DebugPass, - "Enable additional debugging information.") - -TYPED_CODEGENOPT(std::string, DebugCompilationDir, - "The string to embed in debug information as the current " - "working directory.") - -TYPED_CODEGENOPT(std::string, DwarfDebugFlags, - "The string to embed in the debug information for the compile " - "unit, if non-empty.") - -TYPED_CODEGENOPT(std::string, RecordCommandLine, - "The string containing the commandline for the " - "llvm.commandline metadata, if non-empty.") - -TYPED_CODEGENOPT(DebugPrefixMapTy, DebugPrefixMap, "") - -TYPED_CODEGENOPT(std::string, FloatABI, - "The ABI to use for passing floating point arguments.") - -TYPED_CODEGENOPT(llvm::DenormalMode, FPDenormalMode, - "The floating-point denormal mode to use.") - -TYPED_CODEGENOPT(llvm::DenormalMode, FP32DenormalMode, - "The floating-point denormal mode to use, for float.") - -TYPED_CODEGENOPT(std::string, LimitFloatPrecision, - "The float precision limit to use, if non-empty.") - -TYPED_CODEGENOPT(std::vector, LinkBitcodeFiles, - "The files specified here are linked in to the module before " - "optimizations.") - -TYPED_CODEGENOPT( - std::string, MainFileName, - "The user provided name for the \"main file\", if non-empty. This is " - "useful in situations where the input file name does not match the " - "original input file, for example with -save-temps.") - -TYPED_CODEGENOPT(std::string, SplitDwarfFile, - "The name for the split debug info file used for the " - "DW_AT_[GNU_]dwo_name attribute in the skeleton CU.") - -TYPED_CODEGENOPT( - std::string, SplitDwarfOutput, - "Output filename for the split debug info, not used in the skeleton CU.") - -TYPED_CODEGENOPT(llvm::Reloc::Model, RelocationModel, - "The name of the relocation model to use.") - -TYPED_CODEGENOPT(std::string, ThreadModel, "The thread model to use") - -TYPED_CODEGENOPT(std::string, TrapFuncName, - "If not an empty string, trap intrinsics are lowered to calls " - "to this function instead of to trap instructions.") - -TYPED_CODEGENOPT(std::vector, DependentLibraries, - "A list of dependent libraries.") - -TYPED_CODEGENOPT(std::vector, LinkerOptions, - "A list of linker options to embed in the object file.") - -TYPED_CODEGENOPT( - std::string, InstrProfileOutput, - "Name of the profile file to use as output for -fprofile-instr-generate, " - "-fprofile-generate, and -fcs-profile-generate.") - -TYPED_CODEGENOPT(std::string, SampleProfileFile, - "Name of the profile file to use with -fprofile-sample-use.") - -TYPED_CODEGENOPT( - std::string, ProfileInstrumentUsePath, - "Name of the profile file to use as input for -fprofile-instr-use") - -TYPED_CODEGENOPT( - std::string, ProfileRemappingFile, - "Name of the profile remapping file to apply to the profile data supplied " - "by -fprofile-sample-use or -fprofile-instr-use.") - -TYPED_CODEGENOPT(std::string, ThinLTOIndexFile, - "Name of the function summary index file to use for ThinLTO " - "function importing.") - -TYPED_CODEGENOPT( - std::string, ThinLinkBitcodeFile, - "Name of a file that can optionally be written with minimized bitcode to " - "be used as input for the ThinLTO thin link step, which only needs the " - "summary and module symbol table (and not, e.g. any debug metadata).") - -TYPED_CODEGENOPT(std::string, SaveTempsFilePrefix, - "Prefix to use for -save-temps output.") - -TYPED_CODEGENOPT( - std::string, CudaGpuBinaryFileName, - "Name of file passed with -fcuda-include-gpubinary option to forward to " - "CUDA runtime back-end for incorporating them into host-side object file.") - -TYPED_CODEGENOPT(std::string, OptRecordFile, - "The name of the file to which the backend should save YAML " - "optimization records.") - -TYPED_CODEGENOPT(std::string, OptRecordPasses, - "The regex that filters the passes that should be saved to " - "the optimization records.") - -TYPED_CODEGENOPT(std::string, OptRecordFormat, - "The format used for serializing remarks (default: YAML)") - -TYPED_CODEGENOPT( - std::string, SymbolPartition, - "The name of the partition that symbols are assigned to, specified with " - "-fsymbol-partition (see https://lld.llvm.org/Partitions.html).") - -TYPED_CODEGENOPT( - std::shared_ptr, OptimizationRemarkPattern, - "Regular expression to select optimizations for which we should enable " - "optimization remarks. Transformation passes whose name matches this " - "expression (and support this feature), will emit a diagnostic whenever " - "they perform a transformation. This is enabled by the -Rpass=regexp flag.") - -TYPED_CODEGENOPT( - std::shared_ptr, OptimizationRemarkMissedPattern, - "Regular expression to select optimizations for which we should enable " - "missed optimization remarks. Transformation passes whose name matches " - "this expression (and support this feature), will emit a diagnostic " - "whenever they tried but failed to perform a transformation. This is " - "enabled by the -Rpass-missed=regexp flag.") - -TYPED_CODEGENOPT( - std::shared_ptr, OptimizationRemarkAnalysisPattern, - "Regular expression to select optimizations for which we should enable " - "optimization analyses. Transformation passes whose name matches this " - "expression (and support this feature), will emit a diagnostic whenever " - "they want to explain why they decided to apply or not apply a given " - "transformation. This is enabled by the -Rpass-analysis=regexp flag.") - -TYPED_CODEGENOPT(std::vector, RewriteMapFiles, - "Set of files defining the rules for the symbol rewriting.") - -TYPED_CODEGENOPT(SanitizerSet, SanitizeRecover, - "Set of sanitizer checks that are non-fatal (i.e. execution " - "should be continued when possible).") - -TYPED_CODEGENOPT(SanitizerSet, SanitizeTrap, - "Set of sanitizer checks that trap rather than diagnose.") - -TYPED_CODEGENOPT(std::vector, CmdArgs, - "List of backend command-line options for -fembed-bitcode.") - -TYPED_CODEGENOPT(std::vector, NoBuiltinFuncs, - "A list of all -fno-builtin-* function names (e.g., memset).") - -TYPED_CODEGENOPT(std::vector, Reciprocals, "") - -TYPED_CODEGENOPT(std::string, PreferVectorWidth, - "The preferred width for auto-vectorization transforms. This " - "is intended to override default transforms based on the " - "width of the architected vector registers.") - -TYPED_CODEGENOPT(XRayInstrSet, XRayInstrumentationBundle, - "Set of XRay instrumentation kinds to emit.") - -TYPED_CODEGENOPT(std::vector, DefaultFunctionAttrs, "") - -TYPED_CODEGENOPT( - std::vector, PassPlugins, - "List of dynamic shared object files to be loaded as pass plugins.") - -TYPED_CODEGENOPT( - std::vector, SanitizeCoverageAllowlistFiles, - "Path to allowlist file specifying which objects (files, functions) should " - "exclusively be instrumented by sanitizer coverage pass.") - -TYPED_CODEGENOPT(std::vector, SanitizeCoverageBlocklistFiles, - "Path to blocklist file specifying which objects (files, " - "functions) listed for instrumentation by sanitizer coverage " - "pass should actually not be instrumented.") - -TYPED_CODEGENOPT( - const char *, Argv0, - "Executable and command-line used to create a given CompilerInvocation. " - "Most of the time this will be the full -cc1 command.") - -TYPED_CODEGENOPT(ArrayRef, CommandLineArgs, "") - #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT -#undef TYPED_CODEGENOPT diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 093f4014ae8c3..ca391bf8f1861 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -110,15 +110,75 @@ class CodeGenOptions : public CodeGenOptionsBase { Embed_Marker // Embed a marker as a placeholder for bitcode. }; + // This field stores one of the allowed values for the option + // -fbasic-block-sections=. The allowed values with this option are: + // {"labels", "all", "list=", "none"}. + // + // "labels": Only generate basic block symbols (labels) for all basic + // blocks, do not generate unique sections for basic blocks. + // Use the machine basic block id in the symbol name to + // associate profile info from virtual address to machine + // basic block. + // "all" : Generate basic block sections for all basic blocks. + // "list=": Generate basic block sections for a subset of basic blocks. + // The functions and the machine basic block ids are specified + // in the file. + // "none": Disable sections/labels for basic blocks. + std::string BBSections; + enum class FramePointerKind { None, // Omit all frame pointers. NonLeaf, // Keep non-leaf frame pointers. All, // Keep all frame pointers. }; - using DebugPrefixMapTy = std::map; + /// The code model to use (-mcmodel). + std::string CodeModel; + + /// The filename with path we use for coverage data files. The runtime + /// allows further manipulation with the GCOV_PREFIX and GCOV_PREFIX_STRIP + /// environment variables. + std::string CoverageDataFile; + + /// The filename with path we use for coverage notes files. + std::string CoverageNotesFile; + + /// Regexes separated by a semi-colon to filter the files to instrument. + std::string ProfileFilterFiles; + + /// Regexes separated by a semi-colon to filter the files to not instrument. + std::string ProfileExcludeFiles; + + /// The version string to put into coverage files. + char CoverageVersion[4]; + + /// Enable additional debugging information. + std::string DebugPass; + + /// The string to embed in debug information as the current working directory. + std::string DebugCompilationDir; + + /// The string to embed in the debug information for the compile unit, if + /// non-empty. + std::string DwarfDebugFlags; + + /// The string containing the commandline for the llvm.commandline metadata, + /// if non-empty. + std::string RecordCommandLine; + + std::map DebugPrefixMap; + + /// The ABI to use for passing floating point arguments. + std::string FloatABI; + + /// The floating-point denormal mode to use. + llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE(); - using CoverageVersionTy = char[4]; + /// The floating-point denormal mode to use, for float. + llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE(); + + /// The float precision limit to use, if non-empty. + std::string LimitFloatPrecision; struct BitcodeFileToLink { /// The filename of the bitcode file to link in. @@ -133,14 +193,156 @@ class CodeGenOptions : public CodeGenOptionsBase { unsigned LinkFlags = 0; }; + /// The files specified here are linked in to the module before optimizations. + std::vector LinkBitcodeFiles; + + /// The user provided name for the "main file", if non-empty. This is useful + /// in situations where the input file name does not match the original input + /// file, for example with -save-temps. + std::string MainFileName; + + /// The name for the split debug info file used for the DW_AT_[GNU_]dwo_name + /// attribute in the skeleton CU. + std::string SplitDwarfFile; + + /// Output filename for the split debug info, not used in the skeleton CU. + std::string SplitDwarfOutput; + + /// The name of the relocation model to use. + llvm::Reloc::Model RelocationModel; + + /// The thread model to use + std::string ThreadModel; + + /// If not an empty string, trap intrinsics are lowered to calls to this + /// function instead of to trap instructions. + std::string TrapFuncName; + + /// A list of dependent libraries. + std::vector DependentLibraries; + + /// A list of linker options to embed in the object file. + std::vector LinkerOptions; + + /// Name of the profile file to use as output for -fprofile-instr-generate, + /// -fprofile-generate, and -fcs-profile-generate. + std::string InstrProfileOutput; + + /// Name of the profile file to use with -fprofile-sample-use. + std::string SampleProfileFile; + + /// Name of the profile file to use as input for -fprofile-instr-use + std::string ProfileInstrumentUsePath; + + /// Name of the profile remapping file to apply to the profile data supplied + /// by -fprofile-sample-use or -fprofile-instr-use. + std::string ProfileRemappingFile; + + /// Name of the function summary index file to use for ThinLTO function + /// importing. + std::string ThinLTOIndexFile; + + /// Name of a file that can optionally be written with minimized bitcode + /// to be used as input for the ThinLTO thin link step, which only needs + /// the summary and module symbol table (and not, e.g. any debug metadata). + std::string ThinLinkBitcodeFile; + + /// Prefix to use for -save-temps output. + std::string SaveTempsFilePrefix; + + /// Name of file passed with -fcuda-include-gpubinary option to forward to + /// CUDA runtime back-end for incorporating them into host-side object file. + std::string CudaGpuBinaryFileName; + + /// The name of the file to which the backend should save YAML optimization + /// records. + std::string OptRecordFile; + + /// The regex that filters the passes that should be saved to the optimization + /// records. + std::string OptRecordPasses; + + /// The format used for serializing remarks (default: YAML) + std::string OptRecordFormat; + + /// The name of the partition that symbols are assigned to, specified with + /// -fsymbol-partition (see https://lld.llvm.org/Partitions.html). + std::string SymbolPartition; + + /// Regular expression to select optimizations for which we should enable + /// optimization remarks. Transformation passes whose name matches this + /// expression (and support this feature), will emit a diagnostic + /// whenever they perform a transformation. This is enabled by the + /// -Rpass=regexp flag. + std::shared_ptr OptimizationRemarkPattern; + + /// Regular expression to select optimizations for which we should enable + /// missed optimization remarks. Transformation passes whose name matches this + /// expression (and support this feature), will emit a diagnostic + /// whenever they tried but failed to perform a transformation. This is + /// enabled by the -Rpass-missed=regexp flag. + std::shared_ptr OptimizationRemarkMissedPattern; + + /// Regular expression to select optimizations for which we should enable + /// optimization analyses. Transformation passes whose name matches this + /// expression (and support this feature), will emit a diagnostic + /// whenever they want to explain why they decided to apply or not apply + /// a given transformation. This is enabled by the -Rpass-analysis=regexp + /// flag. + std::shared_ptr OptimizationRemarkAnalysisPattern; + + /// Set of files defining the rules for the symbol rewriting. + std::vector RewriteMapFiles; + + /// Set of sanitizer checks that are non-fatal (i.e. execution should be + /// continued when possible). + SanitizerSet SanitizeRecover; + + /// Set of sanitizer checks that trap rather than diagnose. + SanitizerSet SanitizeTrap; + + /// List of backend command-line options for -fembed-bitcode. + std::vector CmdArgs; + + /// A list of all -fno-builtin-* function names (e.g., memset). + std::vector NoBuiltinFuncs; + + std::vector Reciprocals; + + /// The preferred width for auto-vectorization transforms. This is intended to + /// override default transforms based on the width of the architected vector + /// registers. + std::string PreferVectorWidth; + + /// Set of XRay instrumentation kinds to emit. + XRayInstrSet XRayInstrumentationBundle; + + std::vector DefaultFunctionAttrs; + + /// List of dynamic shared object files to be loaded as pass plugins. + std::vector PassPlugins; + + /// Path to allowlist file specifying which objects + /// (files, functions) should exclusively be instrumented + /// by sanitizer coverage pass. + std::vector SanitizeCoverageAllowlistFiles; + + /// Path to blocklist file specifying which objects + /// (files, functions) listed for instrumentation by sanitizer + /// coverage pass should actually not be instrumented. + std::vector SanitizeCoverageBlocklistFiles; + + /// Executable and command-line used to create a given CompilerInvocation. + /// Most of the time this will be the full -cc1 command. + const char *Argv0 = nullptr; + ArrayRef CommandLineArgs; public: // Define accessors/mutators for code generation options of enumeration type. #define CODEGENOPT(Name, Bits, Default) -#define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ - Type get##Name() const { return static_cast(Name); } \ +#define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } -#define TYPED_CODEGENOPT(Type, Name, Description) Type Name; #include "clang/Basic/CodeGenOptions.def" CodeGenOptions(); diff --git a/clang/include/clang/Basic/CommentOptions.def b/clang/include/clang/Basic/CommentOptions.def deleted file mode 100644 index 537f9eb34bd43..0000000000000 --- a/clang/include/clang/Basic/CommentOptions.def +++ /dev/null @@ -1,26 +0,0 @@ -//===--- CommentOptions.def - Comment option database -------------*- C++ -//-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the comment options. Users of this file must -// define the TYPED_COMMENTOPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_COMMENTOPT -#define TYPED_COMMENTOPT(Type, Name, Description) -#endif - -TYPED_COMMENTOPT(BlockCommandNamesTy, BlockCommandNames, - "Command names to treat as vlock commands in comments. Should " - "not include the leading backslash.") - -TYPED_COMMENTOPT(bool, ParseAllComments, - "Treat ordinary comments as documentation comments") - -#undef TYPED_COMMENTOPT diff --git a/clang/include/clang/Basic/CommentOptions.h b/clang/include/clang/Basic/CommentOptions.h index 149650e6192a4..7d142fc32f511 100644 --- a/clang/include/clang/Basic/CommentOptions.h +++ b/clang/include/clang/Basic/CommentOptions.h @@ -23,10 +23,14 @@ namespace clang { struct CommentOptions { using BlockCommandNamesTy = std::vector; -#define TYPED_COMMENTOPT(Type, Name, Description) Type Name; -#include "clang/Basic/CommentOptions.def" + /// Command names to treat as block commands in comments. + /// Should not include the leading backslash. + BlockCommandNamesTy BlockCommandNames; - CommentOptions() : ParseAllComments(false) {} + /// Treat ordinary comments as documentation comments. + bool ParseAllComments = false; + + CommentOptions() = default; }; } // namespace clang diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 9be75f3751198..6a9ff309e49cb 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -298,6 +298,8 @@ def note_constexpr_bit_cast_invalid_subtype : Note< def note_constexpr_bit_cast_indet_dest : Note< "indeterminate value can only initialize an object of type 'unsigned char'" "%select{, 'char',|}1 or 'std::byte'; %0 is invalid">; +def note_constexpr_bit_cast_unrepresentable_value : Note< + "value %1 cannot be represented in type %0">; def note_constexpr_pseudo_destructor : Note< "pseudo-destructor call is not permitted in constant expressions " "until C++20">; diff --git a/clang/include/clang/Basic/DiagnosticOptions.def b/clang/include/clang/Basic/DiagnosticOptions.def index 35b01b8c5ce04..a946b5c6be8ef 100644 --- a/clang/include/clang/Basic/DiagnosticOptions.def +++ b/clang/include/clang/Basic/DiagnosticOptions.def @@ -43,10 +43,6 @@ DIAGOPT(Name, Bits, Default) ENUM_DIAGOPT(Name, Type, Bits, Default) #endif -#ifndef TYPED_DIAGOPT -#define TYPED_DIAGOPT(Type, Name, Description) -#endif - SEMANTIC_DIAGOPT(IgnoreWarnings, 1, 0) /// -w DIAGOPT(NoRewriteMacros, 1, 0) /// -Wno-rewrite-macros DIAGOPT(Pedantic, 1, 0) /// -pedantic @@ -99,32 +95,9 @@ VALUE_DIAGOPT(TabStop, 32, DefaultTabStop) /// The distance between tab stops. /// Column limit for formatting message diagnostics, or 0 if unused. VALUE_DIAGOPT(MessageLength, 32, 0) -TYPED_DIAGOPT(std::string, DiagnosticLogFile, - "The file to log diagnostic output to.") - -TYPED_DIAGOPT(std::string, DiagnosticSerializationFile, - "The file to serialize diagnostics to (non-appending).") - -TYPED_DIAGOPT(std::vector, Warnings, - "The list of -W... options used to alter the diagnostic " - "mappings, with the prefixes removed.") - -TYPED_DIAGOPT(std::vector, UndefPrefixes, - "The list of prefixes from -Wundef-prefix=... used to generate " - "warnings for undefined macros.") - -TYPED_DIAGOPT(std::vector, Remarks, - "The list of -R... options used to alter the diagnostic " - "mappings, with the prefixes removed.") - -TYPED_DIAGOPT(std::vector, VerifyPrefixes, - "The prefixes for comment directives sought by -verify " - "(\"expected\" by /// default).") - #undef DIAGOPT #undef ENUM_DIAGOPT #undef VALUE_DIAGOPT #undef SEMANTIC_DIAGOPT #undef SEMANTIC_ENUM_DIAGOPT #undef SEMANTIC_VALUE_DIAGOPT -#undef TYPED_DIAGOPT diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h index 2b6bd1fd2be57..7fbe534c5994b 100644 --- a/clang/include/clang/Basic/DiagnosticOptions.h +++ b/clang/include/clang/Basic/DiagnosticOptions.h @@ -88,9 +88,31 @@ class DiagnosticOptions : public RefCountedBase{ #include "clang/Basic/DiagnosticOptions.def" public: -#define TYPED_DIAGOPT(Type, Name, Description) Type Name; + /// The file to log diagnostic output to. + std::string DiagnosticLogFile; + + /// The file to serialize diagnostics to (non-appending). + std::string DiagnosticSerializationFile; + + /// The list of -W... options used to alter the diagnostic mappings, with the + /// prefixes removed. + std::vector Warnings; + + /// The list of prefixes from -Wundef-prefix=... used to generate warnings + /// for undefined macros. + std::vector UndefPrefixes; + + /// The list of -R... options used to alter the diagnostic mappings, with the + /// prefixes removed. + std::vector Remarks; + + /// The prefixes for comment directives sought by -verify ("expected" by + /// default). + std::vector VerifyPrefixes; + +public: + // Define accessors/mutators for diagnostic options of enumeration type. #define DIAGOPT(Name, Bits, Default) -// Define accessors/mutators for diagnostic options of enumeration type. #define ENUM_DIAGOPT(Name, Type, Bits, Default) \ Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index c7b3031e0644e..57e6f398f3507 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1136,6 +1136,12 @@ def ext_stdc_pragma_ignored : ExtWarn<"unknown pragma in STDC namespace">, def warn_stdc_fenv_access_not_supported : Warning<"pragma STDC FENV_ACCESS ON is not supported, ignoring pragma">, InGroup; +def warn_stdc_fenv_round_not_supported : + Warning<"pragma STDC FENV_ROUND is not supported">, + InGroup; +def warn_stdc_unknown_rounding_mode : Warning< + "invalid or unsupported rounding mode in '#pragma STDC FENV_ROUND' - ignored">, + InGroup; // - #pragma comment def err_pragma_comment_malformed : Error< "pragma comment requires parenthesized identifier and optional string">; diff --git a/clang/include/clang/Basic/FileSystemOptions.def b/clang/include/clang/Basic/FileSystemOptions.def deleted file mode 100644 index 794e9871998e7..0000000000000 --- a/clang/include/clang/Basic/FileSystemOptions.def +++ /dev/null @@ -1,21 +0,0 @@ -//===--- FileSystemOptions.def - FileSystem option database -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the FileSystem options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_FILESYSTEMOPT -#error define TYPED_FILESYSTEMOPT macro to hand filesystem options -#endif - -TYPED_FILESYSTEMOPT(std::string, WorkingDir, - "If set, paths are resolved as if the working directory was set " - "to the value of WorkingDir.") - -#undef TYPED_FILESYSTEMOPT diff --git a/clang/include/clang/Basic/FileSystemOptions.h b/clang/include/clang/Basic/FileSystemOptions.h index 4fd0851145a2b..458af0c7b6592 100644 --- a/clang/include/clang/Basic/FileSystemOptions.h +++ b/clang/include/clang/Basic/FileSystemOptions.h @@ -21,8 +21,9 @@ namespace clang { /// Keeps track of options that affect how file operations are performed. class FileSystemOptions { public: -#define TYPED_FILESYSTEMOPT(Type, Name, Description) Type Name; -#include "clang/Basic/FileSystemOptions.def" + /// If set, paths are resolved as if the working directory was + /// set to the value of WorkingDir. + std::string WorkingDir; }; } // end namespace clang diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index ff431726e91d4..a54aab241df7f 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -78,10 +78,6 @@ COMPATIBLE_VALUE_LANGOPT(Name, Bits, Default, Description) #endif -#ifndef TYPED_LANGOPT -#define TYPED_LANGOPT(Type, Name, Descritpion) -#endif - // FIXME: A lot of the BENIGN_ options should be COMPATIBLE_ instead. LANGOPT(C99 , 1, 0, "C99") LANGOPT(C11 , 1, 0, "C11") @@ -244,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") -LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP") +LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP") LANGOPT(SYCL , 1, 0, "SYCL") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") @@ -277,6 +273,9 @@ BENIGN_LANGOPT(DumpRecordLayoutsSimple , 1, 0, "dumping the layout of IRgen'd re BENIGN_LANGOPT(DumpVTableLayouts , 1, 0, "dumping the layouts of emitted vtables") LANGOPT(NoConstantCFStrings , 1, 0, "no constant CoreFoundation strings") BENIGN_LANGOPT(InlineVisibilityHidden , 1, 0, "hidden visibility for inline C++ methods") +BENIGN_LANGOPT(VisibilityInlinesHiddenStaticLocalVar, 1, 0, + "hidden visibility for static local variables in inline C++ " + "methods when -fvisibility-inlines hidden is enabled") LANGOPT(GlobalAllocationFunctionVisibilityHidden , 1, 0, "hidden visibility for global operator new and delete declaration") BENIGN_LANGOPT(ParseUnknownAnytype, 1, 0, "__unknown_anytype") BENIGN_LANGOPT(DebuggerSupport , 1, 0, "debugger support") @@ -400,73 +399,6 @@ LANGOPT(RelativeCXXABIVTables, 1, 0, LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits") -TYPED_LANGOPT(SanitizerSet, Sanitize, "Set of enabled sanitizers.") - -TYPED_LANGOPT(std::vector, SanitizerBlacklistFiles, - "Paths to blacklist files specifying which objects (files, " - "functions, variables) should not be instrumented.") - -TYPED_LANGOPT(std::vector, XRayAlwaysInstrumentFiles, - "Paths to the XRay \"always instrument\" files specifying which " - "objects (files, functions, variables) should be imbued with the " - "XRay \"always instrument\" attribute. WARNING: This is a " - "deprecated field and will go away in the future.") - -TYPED_LANGOPT(std::vector, XRayNeverInstrumentFiles, - "Paths to the XRay \"never instrument\" files specifying which " - "objects (files, functions, variables) should be imbued with the " - "XRay \"never instrument\" attribute. WARNING: This is a " - "deprecated field and will go away in the future.") - -TYPED_LANGOPT(std::vector, XRayAttrListFiles, - "Paths to the XRay attribute list files, specifying which " - "objects (files, functions, variables) should be imbued with the " - "appropriate XRay attribute(s).") - -TYPED_LANGOPT(clang::ObjCRuntime, ObjCRuntime, "") - -TYPED_LANGOPT(CoreFoundationABI, CFRuntime, "") - -TYPED_LANGOPT(std::string, ObjCConstantStringClass, "") - -TYPED_LANGOPT( - std::string, OverflowHandler, - "The name of the handler function to be called when -ftrapv is specified. " - "If none is specified, abort (GCC-compatible behaviour).") - -TYPED_LANGOPT( - std::string, ModuleName, - "The module currently being compiled as specified by -fmodule-name.") - -TYPED_LANGOPT( - std::string, CurrentModule, - "The name of the current module, of which the main source file is a part. " - "If CompilingModule is set, we are compiling the interface of this module, " - "otherwise we are compiling an implementation file of it. This starts as " - "ModuleName in case -fmodule-name is provided and changes during " - "compilation to reflect the current module.") - -TYPED_LANGOPT(std::vector, ModuleFeatures, - "The names of any features to enable in module 'requires' decls " - "in addition to the hard-coded list in Module.cpp and the target " - "features. This list is sorted.") - -TYPED_LANGOPT(std::vector, NoBuiltinFuncs, - "A list of all -fno-builtin-* function names (e.g., memset).") - -TYPED_LANGOPT( - std::vector, OMPTargetTriples, - "Triples of the OpenMP targets that the host code codegen should take into " - "account in order to generate accurate offloading descriptors.") - -TYPED_LANGOPT(std::string, OMPHostIRFile, - "Name of the IR file that contains the result of the OpenMP " - "target host code generation.") - -TYPED_LANGOPT(bool, IsHeaderFile, - "Indicates whether the front-end is explicitly told that the " - "input is a header file (i.e. -x c-header).") - #undef LANGOPT #undef COMPATIBLE_LANGOPT #undef BENIGN_LANGOPT @@ -476,4 +408,3 @@ TYPED_LANGOPT(bool, IsHeaderFile, #undef VALUE_LANGOPT #undef COMPATIBLE_VALUE_LANGOPT #undef BENIGN_VALUE_LANGOPT -#undef TYPED_LANGOPT diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 110943fe63c03..9d51eb7ba597d 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -232,12 +232,75 @@ class LangOptions : public LangOptionsBase { }; public: + /// Set of enabled sanitizers. + SanitizerSet Sanitize; + + /// Paths to blacklist files specifying which objects + /// (files, functions, variables) should not be instrumented. + std::vector SanitizerBlacklistFiles; + + /// Paths to the XRay "always instrument" files specifying which + /// objects (files, functions, variables) should be imbued with the XRay + /// "always instrument" attribute. + /// WARNING: This is a deprecated field and will go away in the future. + std::vector XRayAlwaysInstrumentFiles; + + /// Paths to the XRay "never instrument" files specifying which + /// objects (files, functions, variables) should be imbued with the XRay + /// "never instrument" attribute. + /// WARNING: This is a deprecated field and will go away in the future. + std::vector XRayNeverInstrumentFiles; + + /// Paths to the XRay attribute list files, specifying which objects + /// (files, functions, variables) should be imbued with the appropriate XRay + /// attribute(s). + std::vector XRayAttrListFiles; + + clang::ObjCRuntime ObjCRuntime; + + CoreFoundationABI CFRuntime = CoreFoundationABI::Unspecified; + + std::string ObjCConstantStringClass; + + /// The name of the handler function to be called when -ftrapv is + /// specified. + /// + /// If none is specified, abort (GCC-compatible behaviour). + std::string OverflowHandler; + + /// The module currently being compiled as specified by -fmodule-name. + std::string ModuleName; + + /// The name of the current module, of which the main source file + /// is a part. If CompilingModule is set, we are compiling the interface + /// of this module, otherwise we are compiling an implementation file of + /// it. This starts as ModuleName in case -fmodule-name is provided and + /// changes during compilation to reflect the current module. + std::string CurrentModule; + + /// The names of any features to enable in module 'requires' decls + /// in addition to the hard-coded list in Module.cpp and the target features. + /// + /// This list is sorted. + std::vector ModuleFeatures; + /// Options for parsing comments. CommentOptions CommentOpts; -#define LANGOPT(Name, Bits, Default, Description) -#define TYPED_LANGOPT(Type, Name, Description) Type Name; -#include "clang/Basic/LangOptions.def" + /// A list of all -fno-builtin-* function names (e.g., memset). + std::vector NoBuiltinFuncs; + + /// Triples of the OpenMP targets that the host code codegen should + /// take into account in order to generate accurate offloading descriptors. + std::vector OMPTargetTriples; + + /// Name of the IR file that contains the result of the OpenMP target + /// host code generation. + std::string OMPHostIRFile; + + /// Indicates whether the front-end is explicitly told that the + /// input is a header file (i.e. -x c-header). + bool IsHeaderFile = false; /// SYCL integration header to be generated by the device compiler std::string SYCLIntHeader; diff --git a/clang/include/clang/Basic/TargetOptions.def b/clang/include/clang/Basic/TargetOptions.def deleted file mode 100644 index 33e746f012cea..0000000000000 --- a/clang/include/clang/Basic/TargetOptions.def +++ /dev/null @@ -1,88 +0,0 @@ -//===--- TargetOptions.def - Target option database -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the target options. Users of this file must -// define the TYPED_TARGETOPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_TARGETOPT -#error Define the TYPED_TARGETOPT macro to handle target options -#endif - -TYPED_TARGETOPT(std::string, Triple, - "The name of the target triple to compile for.") - -TYPED_TARGETOPT( - std::string, HostTriple, - "When compiling for the device side, contains the triple used to " - "compile for the host.") - -TYPED_TARGETOPT(std::string, CPU, - "If given, the name of the target CPU to generate code for.") - -TYPED_TARGETOPT(std::string, TuneCPU, - "If given, the name of the target CPU to tune code for.") - -TYPED_TARGETOPT(std::string, FPMath, - "If given, the unit to use for floating point math.") - -TYPED_TARGETOPT(std::string, ABI, - "If given, the name of the target ABI to use.") - -TYPED_TARGETOPT(llvm::EABI, EABIVersion, "The EABI version to use.") - -TYPED_TARGETOPT(std::string, LinkerVersion, - "If given, the version string of the linker in use.") - -TYPED_TARGETOPT(std::vector, FeaturesAsWritten, - "The list of target specific features to enable or disable, as " - "written on the command line.") - -TYPED_TARGETOPT( - std::vector, Features, - "The list of target specific features to enable or disable -- this " - "should be a list of strings starting with by '+' or '-'.") - -TYPED_TARGETOPT(llvm::StringMap, FeatureMap, - "The map of which features have been enabled disabled based on " - "the command line.") - -TYPED_TARGETOPT(OpenCLOptions, SupportedOpenCLOptions, - "Supported OpenCL extensions and optional core features.") - -TYPED_TARGETOPT( - std::vector, OpenCLExtensionsAsWritten, - "The list of OpenCL extensions to enable or disable, as written on " - "the command line.") - -TYPED_TARGETOPT( - bool, ForceEnableInt128, - "If given, enables support for __int128_t and __uint128_t types.") - -TYPED_TARGETOPT( - bool, NVPTXUseShortPointers, - "If enabled, use 32-bit pointers for accessing const/local/shared " - "address space.") - -TYPED_TARGETOPT( - std::string, CodeModel, - "The code model to be used as specified by the user. Corresponds to " - "CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, " - "plus \"default\" for the case when the user has not explicitly " - "specified a code model.") - -TYPED_TARGETOPT( - llvm::VersionTuple, SDKVersion, - "The version of the SDK which was used during the compilation. The option " - "is used for two different purposes. On Darwin the version is propagated " - "to LLVM where it's used to support SDK Version metadata (See D55673). " - "CUDA compilation uses it to control parts of CUDA compilation in clang " - "that depend on specific version of the CUDA SDK.") - -#undef TYPED_TARGETOPT diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 1771c3bdbb611..d1cc024957dae 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -25,9 +25,69 @@ namespace clang { /// Options for controlling the target. class TargetOptions { public: -#define TYPED_TARGETOPT(Type, Name, Description) Type Name; -#include "clang/Basic/TargetOptions.def" - TargetOptions() : ForceEnableInt128(false), NVPTXUseShortPointers(false) {} + /// The name of the target triple to compile for. + std::string Triple; + + /// When compiling for the device side, contains the triple used to compile + /// for the host. + std::string HostTriple; + + /// If given, the name of the target CPU to generate code for. + std::string CPU; + + /// If given, the name of the target CPU to tune code for. + std::string TuneCPU; + + /// If given, the unit to use for floating point math. + std::string FPMath; + + /// If given, the name of the target ABI to use. + std::string ABI; + + /// The EABI version to use + llvm::EABI EABIVersion; + + /// If given, the version string of the linker in use. + std::string LinkerVersion; + + /// The list of target specific features to enable or disable, as written on the command line. + std::vector FeaturesAsWritten; + + /// The list of target specific features to enable or disable -- this should + /// be a list of strings starting with by '+' or '-'. + std::vector Features; + + /// The map of which features have been enabled disabled based on the command + /// line. + llvm::StringMap FeatureMap; + + /// Supported OpenCL extensions and optional core features. + OpenCLOptions SupportedOpenCLOptions; + + /// The list of OpenCL extensions to enable or disable, as written on + /// the command line. + std::vector OpenCLExtensionsAsWritten; + + /// If given, enables support for __int128_t and __uint128_t types. + bool ForceEnableInt128 = false; + + /// \brief If enabled, use 32-bit pointers for accessing const/local/shared + /// address space. + bool NVPTXUseShortPointers = false; + + // The code model to be used as specified by the user. Corresponds to + // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus + // "default" for the case when the user has not explicitly specified a + // code model. + std::string CodeModel; + + /// The version of the SDK which was used during the compilation. + /// The option is used for two different purposes: + /// * on darwin the version is propagated to LLVM where it's used + /// to support SDK Version metadata (See D55673). + /// * CUDA compilation uses it to control parts of CUDA compilation + /// in clang that depend on specific version of the CUDA SDK. + llvm::VersionTuple SDKVersion; }; } // end namespace clang diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 8634994bbfe68..39daf1cd3a8ec 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -830,6 +830,11 @@ PRAGMA_ANNOTATION(pragma_fp_contract) // handles them. PRAGMA_ANNOTATION(pragma_fenv_access) +// Annotation for #pragma STDC FENV_ROUND +// The lexer produces these so that they only take effect when the parser +// handles them. +PRAGMA_ANNOTATION(pragma_fenv_round) + // Annotation for #pragma float_control // The lexer produces these so that they only take effect when the parser // handles them. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cebbb27609297..d7e45b41c6a06 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1014,7 +1014,7 @@ defm cxx_static_destructors : OptOutFFlag<"c++-static-destructors", "", def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Flags<[CC1Option]>; -defm memprof : OptInFFlag<"memprof", "Enable", "Disable", " heap memory profiling">; +defm memory_profile : OptInFFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">; // Begin sanitizer flags. These should all be core options exposed in all driver // modes. @@ -2006,6 +2006,17 @@ def fvisibility_EQ : Joined<["-"], "fvisibility=">, Group, def fvisibility_inlines_hidden : Flag<["-"], "fvisibility-inlines-hidden">, Group, HelpText<"Give inline C++ member functions hidden visibility by default">, Flags<[CC1Option]>; +def fvisibility_inlines_hidden_static_local_var : + Flag<["-"], "fvisibility-inlines-hidden-static-local-var">, Group, + HelpText<"When -fvisibility-inlines-hidden is enabled, static variables in " + "inline C++ member functions will also be given hidden visibility " + "by default">, + Flags<[CC1Option]>; +def fno_visibility_inlines_hidden_static_local_var : + Flag<["-"], "fno-visibility-inlines-hidden-static-local-var">, Group, + HelpText<"Disables -fvisibility-inlines-hidden-static-local-var " + "(this is the default on non-darwin targets)">, + Flags<[CC1Option]>; def fvisibility_ms_compat : Flag<["-"], "fvisibility-ms-compat">, Group, HelpText<"Give global types 'default' visibility and global functions and " "variables 'hidden' visibility by default">; @@ -2812,7 +2823,7 @@ def no_pie : Flag<["-"], "no-pie">, Alias; def noprebind : Flag<["-"], "noprebind">; def noprofilelib : Flag<["-"], "noprofilelib">; def noseglinkedit : Flag<["-"], "noseglinkedit">; -def nostartfiles : Flag<["-"], "nostartfiles">; +def nostartfiles : Flag<["-"], "nostartfiles">, Group; def nostdinc : Flag<["-"], "nostdinc">, Flags<[CoreOption]>; def nostdlibinc : Flag<["-"], "nostdlibinc">; def nostdincxx : Flag<["-"], "nostdinc++">, Flags<[CC1Option]>, @@ -2915,7 +2926,7 @@ def segs__read__ : Joined<["-"], "segs_read_">; def shared_libgcc : Flag<["-"], "shared-libgcc">; def shared : Flag<["-", "--"], "shared">, Group; def single__module : Flag<["-"], "single_module">; -def specs_EQ : Joined<["-", "--"], "specs=">; +def specs_EQ : Joined<["-", "--"], "specs=">, Group; def specs : Separate<["-", "--"], "specs">, Flags<[Unsupported]>; def static_libgcc : Flag<["-"], "static-libgcc">; def static_libstdcxx : Flag<["-"], "static-libstdc++">; @@ -4782,6 +4793,9 @@ def _SLASH_openmp : CLFlag<"openmp">, HelpText<"Enable OpenMP support">, def _SLASH_openmp_experimental : CLFlag<"openmp:experimental">, HelpText<"Enable OpenMP support with experimental SIMD support">, Alias; +def _SLASH_tune : CLCompileJoined<"tune:">, + HelpText<"Set CPU for optimization without affecting instruction set">, + Alias; // Non-aliases: diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 269eab971a2cb..6bb828d60071f 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -583,6 +583,24 @@ struct FormatStyle { /// The template declaration breaking style to use. BreakTemplateDeclarationsStyle AlwaysBreakTemplateDeclarations; + /// A vector of strings that should be interpreted as attributes/qualifiers + /// instead of identifiers. This can be useful for language extensions or + /// static analyzer annotations. + /// + /// For example: + /// \code + /// x = (char *__capability)&y; + /// int function(void) __ununsed; + /// void only_writes_to_buffer(char *__output buffer); + /// \endcode + /// + /// In the .clang-format configuration file, this can be configured like: + /// \code{.yaml} + /// AttributeMacros: ['__capability', '__output', '__ununsed'] + /// \endcode + /// + std::vector AttributeMacros; + /// If ``false``, a function call's arguments will either be all on the /// same line or will have one line each. /// \code @@ -2351,6 +2369,7 @@ struct FormatStyle { R.AlwaysBreakBeforeMultilineStrings && AlwaysBreakTemplateDeclarations == R.AlwaysBreakTemplateDeclarations && + AttributeMacros == R.AttributeMacros && BinPackArguments == R.BinPackArguments && BinPackParameters == R.BinPackParameters && BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators && diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index c8a95ae69d72b..c723fc084c854 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -19,10 +19,9 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" -#include "clang/Sema/CodeCompleteOptions.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/ArrayRef.h" #include #include @@ -87,9 +86,6 @@ class CompilerInvocationBase { LangOptions *getLangOpts() { return LangOpts.get(); } const LangOptions *getLangOpts() const { return LangOpts.get(); } - CommentOptions &getCommentOpts() { return LangOpts->CommentOpts; } - const CommentOptions &getCommentOpts() const { return LangOpts->CommentOpts; } - TargetOptions &getTargetOpts() { return *TargetOpts.get(); } const TargetOptions &getTargetOpts() const { return *TargetOpts.get(); } @@ -231,14 +227,6 @@ class CompilerInvocation : public CompilerInvocationBase { FrontendOptions &getFrontendOpts() { return FrontendOpts; } const FrontendOptions &getFrontendOpts() const { return FrontendOpts; } - CodeCompleteOptions &getCodeCompleteOpts() { - return FrontendOpts.CodeCompleteOpts; - } - - const CodeCompleteOptions &getCodeCompleteOpts() const { - return FrontendOpts.CodeCompleteOpts; - } - PreprocessorOutputOptions &getPreprocessorOutputOpts() { return PreprocessorOutputOpts; } diff --git a/clang/include/clang/Frontend/DependencyOutputOptions.def b/clang/include/clang/Frontend/DependencyOutputOptions.def deleted file mode 100644 index c018e900fc106..0000000000000 --- a/clang/include/clang/Frontend/DependencyOutputOptions.def +++ /dev/null @@ -1,52 +0,0 @@ -//===--- DependencyOutputOptions.def -------------------------------C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the DependencyOutput options, to use this file one needs -// to define the TYPED_DEPENDENCY_OUTPUTOPT and/or the DEPENDECY_OUTPUTOPT macro -// to get more information about bitfields. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_DEPENDENCY_OUTPUTOPT -#define TYPED_DEPENDENCY_OUTPUTOPT(Type, Name, Description) -#endif - -#ifndef DEPENDENCY_OUTPUTOPT -#define DEPENDENCY_OUTPUTOPT(Name, Bits, Description) \ - TYPED_DEPENDENCY_OUTPUTOPT(unsigned, Name, Description) -#endif - -DEPENDENCY_OUTPUTOPT(IncludeSystemHeaders, 1, "Include system header dependencies.") -DEPENDENCY_OUTPUTOPT(ShowHeaderIncludes, 1, "Show header inclusions (-H).") -DEPENDENCY_OUTPUTOPT(UsePhonyTargets, 1, "Include phony targets for each dependency, which can " - "avoid some 'make' problems.") -DEPENDENCY_OUTPUTOPT(AddMissingHeaderDeps, 1, "Add missing headers to dependency list.") -DEPENDENCY_OUTPUTOPT(IncludeModuleFiles, 1, "Include module file dependencies.") - -TYPED_DEPENDENCY_OUTPUTOPT(ShowIncludesDestination, ShowIncludesDest, "Destination of cl.exe style /showIncludes info.") - -TYPED_DEPENDENCY_OUTPUTOPT(DependencyOutputFormat, OutputFormat, "The format for the dependency file") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, OutputFile, "The file to write dependency output to.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, DependencyFilter, "Dependency output which is prefixed with this string is filtered from the dependency output.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, HeaderIncludeOutputFile, "The file to write header include output to. This is orthogonal to ShowHeaderIncludes (-H) and will include headers mentioned in the predefines buffer. If the output file is \"-\", output will be sent to stderr.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::vector, Targets, "A list of names to use as the targets in the dependency file; this list must contain at least one entry.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::vector, ExtraDeps, "A list of filenames to be used as extra dependencies for every target.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, ShowIncludesPretendHeader, "In /showIncludes mode, pretend the main TU is a header with this name.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, DOTOutputFile, "The file to write GraphViz-formatted header dependencies to.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, ModuleDependencyOutputDir, "The directory to copy module dependencies to when collecting them.") - -#undef TYPED_DEPENDENCY_OUTPUTOPT -#undef DEPENDENCY_OUTPUTOPT \ No newline at end of file diff --git a/clang/include/clang/Frontend/DependencyOutputOptions.h b/clang/include/clang/Frontend/DependencyOutputOptions.h index 581e9b5a544b8..4480171ce212c 100644 --- a/clang/include/clang/Frontend/DependencyOutputOptions.h +++ b/clang/include/clang/Frontend/DependencyOutputOptions.h @@ -24,15 +24,53 @@ enum class DependencyOutputFormat { Make, NMake }; /// file generation. class DependencyOutputOptions { public: -#define TYPED_DEPENDENCY_OUTPUTOPT(Type, Name, Description) Type Name; -#define DEPENDENCY_OUTPUTOPT(Name, Bits, Description) unsigned Name : Bits; -#include "clang/Frontend/DependencyOutputOptions.def" + unsigned IncludeSystemHeaders : 1; ///< Include system header dependencies. + unsigned ShowHeaderIncludes : 1; ///< Show header inclusions (-H). + unsigned UsePhonyTargets : 1; ///< Include phony targets for each + /// dependency, which can avoid some 'make' + /// problems. + unsigned AddMissingHeaderDeps : 1; ///< Add missing headers to dependency list + unsigned IncludeModuleFiles : 1; ///< Include module file dependencies. + + /// Destination of cl.exe style /showIncludes info. + ShowIncludesDestination ShowIncludesDest = ShowIncludesDestination::None; + + /// The format for the dependency file. + DependencyOutputFormat OutputFormat = DependencyOutputFormat::Make; + + /// The file to write dependency output to. + std::string OutputFile; + + /// The file to write header include output to. This is orthogonal to + /// ShowHeaderIncludes (-H) and will include headers mentioned in the + /// predefines buffer. If the output file is "-", output will be sent to + /// stderr. + std::string HeaderIncludeOutputFile; + + /// A list of names to use as the targets in the dependency file; this list + /// must contain at least one entry. + std::vector Targets; + + /// A list of filenames to be used as extra dependencies for every target. + std::vector ExtraDeps; + + /// In /showIncludes mode, pretend the main TU is a header with this name. + std::string ShowIncludesPretendHeader; + + /// The file to write GraphViz-formatted header dependencies to. + std::string DOTOutputFile; + + /// The directory to copy module dependencies to when collecting them. + std::string ModuleDependencyOutputDir; + + /// Dependency output which is prefixed with this string is filtered from + /// the dependency output. + std::string DependencyFilter; + public: DependencyOutputOptions() : IncludeSystemHeaders(0), ShowHeaderIncludes(0), UsePhonyTargets(0), - AddMissingHeaderDeps(0), IncludeModuleFiles(0), - ShowIncludesDest(ShowIncludesDestination::None), - OutputFormat(DependencyOutputFormat::Make) {} + AddMissingHeaderDeps(0), IncludeModuleFiles(0) {} }; } // end namespace clang diff --git a/clang/include/clang/Frontend/FrontendOptions.def b/clang/include/clang/Frontend/FrontendOptions.def deleted file mode 100644 index c6188d9cf0255..0000000000000 --- a/clang/include/clang/Frontend/FrontendOptions.def +++ /dev/null @@ -1,179 +0,0 @@ -//===--- FrontendOptions.def - FileSystem option database -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the Frontend options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_FRONTENDOPT -#define TYPED_FRONTENDOPT(Type, Name, Description) -#endif - -#ifndef FRONTENDOPT -#define FRONTENDOPT(Name, Bits, Description) \ - TYPED_FRONTENDOPT(unsigned, Name, Description) -#endif - -FRONTENDOPT(DisableFree, 1, "Disable memory freeing on exit.") - -FRONTENDOPT(RelocatablePCH, 1, - "When generating PCH files, instruct the AST writer to create " - "relocatable PCH files.") - -FRONTENDOPT(ShowHelp, 1, "Show the -help text.") - -FRONTENDOPT(ShowStats, 1, "Show frontend performance metrics and statistics.") - -FRONTENDOPT(ShowTimers, 1, "Show timers for individual actions.") - -FRONTENDOPT(PrintSupportedCPUs, 1, - "print the supported cpus for the current target") - -FRONTENDOPT(TimeTrace, 1, "Output time trace profile.") - -FRONTENDOPT(ShowVersion, 1, "Show the -version text.") - -FRONTENDOPT(FixWhatYouCan, 1, "Apply fixes even if there are unfixable errors.") - -FRONTENDOPT(FixOnlyWarnings, 1, "Apply fixes only for warnings.") - -FRONTENDOPT(FixAndRecompile, 1, "Apply fixes and recompile.") - -FRONTENDOPT(FixToTemporaries, 1, "Apply fixes to temporary files.") - -FRONTENDOPT(ARCMTAction, 3, "") - -FRONTENDOPT(ARCMTMigrateEmitARCErrors, 1, - "Emit ARC errors even if the migrator can fix them.") - -FRONTENDOPT(SkipFunctionBodies, 1, - "Skip over function bodies to speed up parsing in cases you do not " - "need them (e.g. with code completion).") - -FRONTENDOPT(UseGlobalModuleIndex, 1, - "Whether we can use the global module index if available.") - -FRONTENDOPT(GenerateGlobalModuleIndex, 1, - "Whether we can generate the global module index if needed.") - -FRONTENDOPT(ASTDumpDecls, 1, - "Whether we include declaration dumps in AST dumps.") - -FRONTENDOPT(ASTDumpAll, 1, - "Whether we deserialize all decls when forming AST dumps.") - -FRONTENDOPT(ASTDumpLookups, 1, - "Whether we include lookup table dumps in AST dumps.") - -FRONTENDOPT(ASTDumpDeclTypes, 1, - "Whether we include declaration type dumps in AST dumps.") - -FRONTENDOPT(BuildingImplicitModule, 1, - "Whether we are performing an implicit module build.") - -FRONTENDOPT(ModulesEmbedAllFiles, 1, - "Whether we should embed all used files into the PCM file.") - -FRONTENDOPT(IncludeTimestamps, 1, - "Whether timestamps should be written to the produced PCH file.") - -FRONTENDOPT(UseTemporary, 1, - "Should a temporary file be used during compilation.") - -FRONTENDOPT(IsSystemModule, 1, - "When using -emit-module, treat the modulemap as a system module.") - -TYPED_FRONTENDOPT(ASTDumpOutputFormat, ASTDumpFormat, - "Specifies the output format of the AST.") - -TYPED_FRONTENDOPT(unsigned, ObjCMTAction, "") - -TYPED_FRONTENDOPT(std::string, ObjCMTWhiteListPath, "") - -TYPED_FRONTENDOPT(std::string, MTMigrateDir, "") - -TYPED_FRONTENDOPT(std::string, ARCMTMigrateReportOut, "") - -TYPED_FRONTENDOPT(InputsTy, Inputs, "The input files and their types.") - -TYPED_FRONTENDOPT( - std::string, OriginalModuleMap, - "When the input is a module map, the original module map file from which " - "that map was inferred, if any (for umbrella modules).") - -TYPED_FRONTENDOPT(std::string, OutputFile, "The output file, if any.") - -TYPED_FRONTENDOPT(std::string, FixItSuffix, - "If given, the new suffix for fix-it rewritten files.") - -TYPED_FRONTENDOPT(std::string, ASTDumpFilter, - "If given, filter dumped AST Decl nodes by this substring.") - -TYPED_FRONTENDOPT(ParsedSourceLocation, CodeCompletionAt, - "If given, enable code completion at the provided location.") - -TYPED_FRONTENDOPT(frontend::ActionKind, ProgramAction, - "The frontend action to perform.") - -TYPED_FRONTENDOPT(std::string, ActionName, - "The name of the action to run when using a plugin action.") - -TYPED_FRONTENDOPT(PluginArgsTy, PluginArgs, "Args to pass to the plugins") - -TYPED_FRONTENDOPT( - std::vector, AddPluginActions, - "The list of plugin actions to run in addition to the normal action.") - -TYPED_FRONTENDOPT(std::vector, Plugins, - "The list of plugins to load.") - -TYPED_FRONTENDOPT(std::vector>, - ModuleFileExtensions, "The list of module file extensions.") - -TYPED_FRONTENDOPT( - std::vector, ModuleMapFiles, - "The list of module map files to load before processing the input.") - -TYPED_FRONTENDOPT(std::vector, ModuleFiles, - "The list of additional prebuilt module files to load before " - "processing the input.") - -TYPED_FRONTENDOPT(std::vector, ModulesEmbedFiles, - "The list of files to embed into the compiled module file.") - -TYPED_FRONTENDOPT(std::vector, ASTMergeFiles, - "The list of AST files to merge.") - -TYPED_FRONTENDOPT( - std::vector, LLVMArgs, - "A list of arguments to forward to LLVM's option processing; this should " - "only be used for debugging and experimental features.") - -TYPED_FRONTENDOPT(std::string, OverrideRecordLayoutsFile, - "File name of the file that will provide record layouts (in " - "the format produced by -fdump-record-layouts).") - -TYPED_FRONTENDOPT(std::string, AuxTriple, - "Auxiliary triple for CUDA/HIP compilation.") - -TYPED_FRONTENDOPT(Optional, AuxTargetCPU, - "Auxiliary target CPU for CUDA/HIP compilation.") - -TYPED_FRONTENDOPT(Optional>, AuxTargetFeatures, - "Auxiliary target features for CUDA/HIP compilation.") - -TYPED_FRONTENDOPT(std::string, StatsFile, "Filename to write statistics to.") - -TYPED_FRONTENDOPT( - unsigned, TimeTraceGranularity, - "Minimum time granularity (in microseconds) traced by time profiler.") - -TYPED_FRONTENDOPT(InputKind, DashX, "Input Kind") - -#undef TYPED_FRONTENDOPT -#undef FRONTENDOPT \ No newline at end of file diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 5dccdf50ca462..b2be33032c08d 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -226,14 +226,94 @@ class FrontendInputFile { /// FrontendOptions - Options for controlling the behavior of the frontend. class FrontendOptions { public: - using PluginArgsTy = - std::unordered_map>; + /// Disable memory freeing on exit. + unsigned DisableFree : 1; - using InputsTy = llvm::SmallVector; + /// When generating PCH files, instruct the AST writer to create relocatable + /// PCH files. + unsigned RelocatablePCH : 1; + + /// Show the -help text. + unsigned ShowHelp : 1; + + /// Show frontend performance metrics and statistics. + unsigned ShowStats : 1; + + /// Show timers for individual actions. + unsigned ShowTimers : 1; + + /// print the supported cpus for the current target + unsigned PrintSupportedCPUs : 1; + + /// Output time trace profile. + unsigned TimeTrace : 1; + + /// Show the -version text. + unsigned ShowVersion : 1; + + /// Apply fixes even if there are unfixable errors. + unsigned FixWhatYouCan : 1; + + /// Apply fixes only for warnings. + unsigned FixOnlyWarnings : 1; + + /// Apply fixes and recompile. + unsigned FixAndRecompile : 1; + + /// Apply fixes to temporary files. + unsigned FixToTemporaries : 1; + + /// Emit ARC errors even if the migrator can fix them. + unsigned ARCMTMigrateEmitARCErrors : 1; + + /// Skip over function bodies to speed up parsing in cases you do not need + /// them (e.g. with code completion). + unsigned SkipFunctionBodies : 1; + + /// Whether we can use the global module index if available. + unsigned UseGlobalModuleIndex : 1; + + /// Whether we can generate the global module index if needed. + unsigned GenerateGlobalModuleIndex : 1; + + /// Whether we include declaration dumps in AST dumps. + unsigned ASTDumpDecls : 1; + + /// Whether we deserialize all decls when forming AST dumps. + unsigned ASTDumpAll : 1; + + /// Whether we include lookup table dumps in AST dumps. + unsigned ASTDumpLookups : 1; + + /// Whether we include declaration type dumps in AST dumps. + unsigned ASTDumpDeclTypes : 1; + + /// Whether we are performing an implicit module build. + unsigned BuildingImplicitModule : 1; + + /// Whether we should embed all used files into the PCM file. + unsigned ModulesEmbedAllFiles : 1; + + /// Whether timestamps should be written to the produced PCH file. + unsigned IncludeTimestamps : 1; + + /// Should a temporary file be used during compilation. + unsigned UseTemporary : 1; + + /// When using -emit-module, treat the modulemap as a system module. + unsigned IsSystemModule : 1; CodeCompleteOptions CodeCompleteOpts; - enum { ARCMT_None, ARCMT_Check, ARCMT_Modify, ARCMT_Migrate }; + /// Specifies the output format of the AST. + ASTDumpOutputFormat ASTDumpFormat = ADOF_Default; + + enum { + ARCMT_None, + ARCMT_Check, + ARCMT_Modify, + ARCMT_Migrate + } ARCMTAction = ARCMT_None; enum { ObjCMT_None = 0, @@ -280,18 +360,92 @@ class FrontendOptions { /// Enable converting setter/getter expressions to property-dot syntx. ObjCMT_PropertyDotSyntax = 0x1000, - ObjCMT_MigrateDecls = - (ObjCMT_ReadonlyProperty | ObjCMT_ReadwriteProperty | - ObjCMT_Annotation | ObjCMT_Instancetype | ObjCMT_NsMacros | - ObjCMT_ProtocolConformance | ObjCMT_NsAtomicIOSOnlyProperty | - ObjCMT_DesignatedInitializer), + ObjCMT_MigrateDecls = (ObjCMT_ReadonlyProperty | ObjCMT_ReadwriteProperty | + ObjCMT_Annotation | ObjCMT_Instancetype | + ObjCMT_NsMacros | ObjCMT_ProtocolConformance | + ObjCMT_NsAtomicIOSOnlyProperty | + ObjCMT_DesignatedInitializer), ObjCMT_MigrateAll = (ObjCMT_Literals | ObjCMT_Subscripting | ObjCMT_MigrateDecls | ObjCMT_PropertyDotSyntax) }; + unsigned ObjCMTAction = ObjCMT_None; + std::string ObjCMTWhiteListPath; + + std::string MTMigrateDir; + std::string ARCMTMigrateReportOut; + + /// The input files and their types. + SmallVector Inputs; + + /// When the input is a module map, the original module map file from which + /// that map was inferred, if any (for umbrella modules). + std::string OriginalModuleMap; + + /// The output file, if any. + std::string OutputFile; + + /// If given, the new suffix for fix-it rewritten files. + std::string FixItSuffix; + + /// If given, filter dumped AST Decl nodes by this substring. + std::string ASTDumpFilter; + + /// If given, enable code completion at the provided location. + ParsedSourceLocation CodeCompletionAt; + + /// The frontend action to perform. + frontend::ActionKind ProgramAction = frontend::ParseSyntaxOnly; + + /// The name of the action to run when using a plugin action. + std::string ActionName; + + /// Args to pass to the plugins + std::unordered_map> PluginArgs; + + /// The list of plugin actions to run in addition to the normal action. + std::vector AddPluginActions; + + /// The list of plugins to load. + std::vector Plugins; + + /// The list of module file extensions. + std::vector> ModuleFileExtensions; + + /// The list of module map files to load before processing the input. + std::vector ModuleMapFiles; + + /// The list of additional prebuilt module files to load before + /// processing the input. + std::vector ModuleFiles; + + /// The list of files to embed into the compiled module file. + std::vector ModulesEmbedFiles; + + /// The list of AST files to merge. + std::vector ASTMergeFiles; + + /// A list of arguments to forward to LLVM's option processing; this + /// should only be used for debugging and experimental features. + std::vector LLVMArgs; + + /// File name of the file that will provide record layouts + /// (in the format produced by -fdump-record-layouts). + std::string OverrideRecordLayoutsFile; + + /// Auxiliary triple for CUDA/HIP compilation. + std::string AuxTriple; + + /// Auxiliary target CPU for CUDA/HIP compilation. + Optional AuxTargetCPU; + + /// Auxiliary target features for CUDA/HIP compilation. + Optional> AuxTargetFeatures; + + /// Filename to write statistics to. + std::string StatsFile; -#define FRONTENDOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_FRONTENDOPT(Type, Name, Description) Type Name; -#include "clang/Frontend/FrontendOptions.def" + /// Minimum time granularity (in microseconds) traced by time profiler. + unsigned TimeTraceGranularity; public: FrontendOptions() @@ -299,14 +453,11 @@ class FrontendOptions { ShowStats(false), ShowTimers(false), TimeTrace(false), ShowVersion(false), FixWhatYouCan(false), FixOnlyWarnings(false), FixAndRecompile(false), FixToTemporaries(false), - ARCMTAction(ARCMT_None), ARCMTMigrateEmitARCErrors(false), - SkipFunctionBodies(false), UseGlobalModuleIndex(true), - GenerateGlobalModuleIndex(true), ASTDumpDecls(false), - ASTDumpLookups(false), BuildingImplicitModule(false), - ModulesEmbedAllFiles(false), IncludeTimestamps(true), - UseTemporary(true), ASTDumpFormat(ADOF_Default), - ObjCMTAction(ObjCMT_None), ProgramAction(frontend::ParseSyntaxOnly), - TimeTraceGranularity(500), DashX() {} + ARCMTMigrateEmitARCErrors(false), SkipFunctionBodies(false), + UseGlobalModuleIndex(true), GenerateGlobalModuleIndex(true), + ASTDumpDecls(false), ASTDumpLookups(false), + BuildingImplicitModule(false), ModulesEmbedAllFiles(false), + IncludeTimestamps(true), UseTemporary(true), TimeTraceGranularity(500) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. diff --git a/clang/include/clang/Frontend/MigratorOptions.def b/clang/include/clang/Frontend/MigratorOptions.def deleted file mode 100644 index fbbcc6b686fdf..0000000000000 --- a/clang/include/clang/Frontend/MigratorOptions.def +++ /dev/null @@ -1,27 +0,0 @@ -//===--- MigratorOptions.def - Migrator option database ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the migrator options. Users of this file must -// define the TYPED_MIGRATOROPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_MIGRATOROPT -#define TYPED_MIGRATOROPT(Type, Name, Description) -#endif - -#ifndef MIGRATOROPT -#define MIGRATOROPT(Name, Bits, Description) \ - TYPED_MIGRATOROPT(unsigned, Name, Description) -#endif - -MIGRATOROPT(NoNSAllocReallocError, 1, "") -MIGRATOROPT(NoFinalizeRemoval, 1, "") - -#undef TYPED_MIGRATOROPT -#undef MIGRATOROPT diff --git a/clang/include/clang/Frontend/MigratorOptions.h b/clang/include/clang/Frontend/MigratorOptions.h index f5ee9bba9dec0..cf50ffcf0c4f5 100644 --- a/clang/include/clang/Frontend/MigratorOptions.h +++ b/clang/include/clang/Frontend/MigratorOptions.h @@ -18,10 +18,13 @@ namespace clang { class MigratorOptions { public: -#define MIGRATOROPT(Name, Bits, Description) unsigned Name : Bits; -#include "clang/Frontend/MigratorOptions.def" - - MigratorOptions() : NoNSAllocReallocError(0), NoFinalizeRemoval(0) {} + unsigned NoNSAllocReallocError : 1; + unsigned NoFinalizeRemoval : 1; + MigratorOptions() { + NoNSAllocReallocError = 0; + NoFinalizeRemoval = 0; + } }; + } #endif diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.def b/clang/include/clang/Frontend/PreprocessorOutputOptions.def deleted file mode 100644 index aad2f5eb7294b..0000000000000 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.def +++ /dev/null @@ -1,46 +0,0 @@ -//=== PreprocessorOutputOptions.def - FileSystem option database -*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the PreprocessorOutput options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_PREPROCESSOR_OUTPUTOPT -#define TYPED_PREPROCESSOR_OUTPUTOPT(Type, Name, Description) -#endif - -#ifndef PREPROCESSOR_OUTPUTOPT -#define PREPROCESSOR_OUTPUTOPT(Name, Bits, Description) \ - TYPED_PREPROCESSOR_OUTPUTOPT(unsigned, Name, Description) -#endif - -PREPROCESSOR_OUTPUTOPT(ShowCPP, 1, "Print normal preprocessed output.") - -PREPROCESSOR_OUTPUTOPT(ShowComments, 1, "Show comments.") - -PREPROCESSOR_OUTPUTOPT(ShowLineMarkers, 1, "Show \#line markers.") - -PREPROCESSOR_OUTPUTOPT(UseLineDirectives, 1, - "Use \#line instead of GCC-style \# N.") - -PREPROCESSOR_OUTPUTOPT(ShowMacroComments, 1, "Show comments, even in macros.") - -PREPROCESSOR_OUTPUTOPT(ShowMacros, 1, "Print macro definitions.") - -PREPROCESSOR_OUTPUTOPT( - ShowIncludeDirectives, 1, - "Print includes, imports etc. within preprocessed output.") - -PREPROCESSOR_OUTPUTOPT(RewriteIncludes, 1, - "Preprocess include directives only.") - -PREPROCESSOR_OUTPUTOPT(RewriteImports, 1, - "Include contents of transitively-imported modules.") - -#undef TYPED_PREPROCESSOR_OUTPUTOPT -#undef PREPROCESSOR_OUTPUTOPT \ No newline at end of file diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index ab4f25e394a27..72e5ad1137fb7 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -15,9 +15,15 @@ namespace clang { /// output (e.g., -E). class PreprocessorOutputOptions { public: -#define PREPROCESSOR_OUTPUTOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_PREPROCESSOR_OUTPUTOPT(Type, Name, Description) Type Name; -#include "clang/Frontend/PreprocessorOutputOptions.def" + unsigned ShowCPP : 1; ///< Print normal preprocessed output. + unsigned ShowComments : 1; ///< Show comments. + unsigned ShowLineMarkers : 1; ///< Show \#line markers. + unsigned UseLineDirectives : 1; ///< Use \#line instead of GCC-style \# N. + unsigned ShowMacroComments : 1; ///< Show comments, even in macros. + unsigned ShowMacros : 1; ///< Print macro definitions. + unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. + unsigned RewriteIncludes : 1; ///< Preprocess include directives only. + unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. public: PreprocessorOutputOptions() { diff --git a/clang/include/clang/Lex/HeaderSearchOptions.def b/clang/include/clang/Lex/HeaderSearchOptions.def deleted file mode 100644 index 79fd196c8f905..0000000000000 --- a/clang/include/clang/Lex/HeaderSearchOptions.def +++ /dev/null @@ -1,136 +0,0 @@ -//===--- HeaderSearchOptions.def - HeaderSearch option database -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the header search options. Users of this file must -// define the HEADERSEARCHOPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef HEADERSEARCHOPT -#define HEADERSEARCHOPT(Name, Bits, Description) \ - TYPED_HEADERSEARCHOPT(unsigned, Name, Description) -#endif - -#ifndef TYPED_HEADERSEARCHOPT -#define TYPED_HEADERSEARCHOPT(Type, Name, Description) -#endif - -TYPED_HEADERSEARCHOPT(std::string, Sysroot, - "If non-empty, the directory to use as a \"virtual " - "system root\" for include paths.") - -TYPED_HEADERSEARCHOPT(std::string, ModuleFormat, - "The module/pch container format.") - -HEADERSEARCHOPT(DisableModuleHash, 1, - "Whether we should disable the use of the hash string within " - "the module cache. Note: Only used for testing!") - -HEADERSEARCHOPT(ImplicitModuleMaps, 1, - "Implicit module maps. This option is enabld by default when " - "modules is enabled.") - -HEADERSEARCHOPT( - ModuleMapFileHomeIsCwd, 1, - "Set the 'home directory' of a module map file to the current working " - "directory (or the home directory of the module map file that contained " - "the 'extern module' directive importing this module map file if any) " - "rather than the directory containing the module map file. The home " - "directory is where we look for files named in the module map file.") - -HEADERSEARCHOPT(UseBuiltinIncludes, 1, "Include the compiler builtin includes.") - -HEADERSEARCHOPT(UseStandardSystemIncludes, 1, - "Include the system standard include search directories.") - -HEADERSEARCHOPT( - UseStandardCXXIncludes, 1, - "Include the system standard C++ library include search directories.") - -HEADERSEARCHOPT(UseLibcxx, 1, "Use libc++ instead of the default libstdc++.") - -HEADERSEARCHOPT(Verbose, 1, - "Whether header search information should be output as for -v.") - -HEADERSEARCHOPT( - ModulesValidateOncePerBuildSession, 1, - "If true, skip verifying input files used by modules if the module was " - "already verified during this build session (see BuildSessionTimestamp).") - -HEADERSEARCHOPT( - ModulesValidateSystemHeaders, 1, - "Whether to validate system input files when a module is loaded.") - -HEADERSEARCHOPT(ValidateASTInputFilesContent, 1, - "Whether the content of input files should be hashed and used " - "to validate consistency.") - -HEADERSEARCHOPT(UseDebugInfo, 1, - "Whether the module includes debug information (-gmodules).") - -HEADERSEARCHOPT(ModulesValidateDiagnosticOptions, 1, "") - -HEADERSEARCHOPT(ModulesHashContent, 1, "") - -HEADERSEARCHOPT(ModulesStrictContextHash, 1, - "Whether we should include all things that could impact the " - "module in the hash. This includes things like the full header " - "search path, and enabled diagnostics.") - -TYPED_HEADERSEARCHOPT(std::vector, UserEntries, - "User specified include entries.") - -TYPED_HEADERSEARCHOPT(std::vector, SystemHeaderPrefixes, - "User-specified system header prefixes.") - -TYPED_HEADERSEARCHOPT(std::string, ResourceDir, - "The directory which holds the compiler resource files " - "(builtin includes, etc.).") - -TYPED_HEADERSEARCHOPT(std::string, ModuleCachePath, - "The directory used for the module cache.") - -TYPED_HEADERSEARCHOPT(std::string, ModuleUserBuildPath, - "The directory used for a user build.") - -TYPED_HEADERSEARCHOPT(PrebuiltModuleFilesTy, PrebuiltModuleFiles, - "The mapping of module names to prebuilt module files.") - -TYPED_HEADERSEARCHOPT(std::vector, PrebuiltModulePaths, - "The directories used to load prebuilt module files.") - -TYPED_HEADERSEARCHOPT( - unsigned, ModuleCachePruneInterval, - "The interval (in seconds) between pruning operations. This operation is " - "expensive, because it requires Clang to walk through the directory " - "structure of the module cache, stat()'ing and removing files. The " - "default value is large, e.g., the operation runs once a week.") - -TYPED_HEADERSEARCHOPT( - unsigned, ModuleCachePruneAfter, - "The time (in seconds) after which an unused module file will be " - "considered unused and will, therefore, be pruned. When the module cache " - "is pruned, any module file that has not been accessed in this many " - "seconds will be removed. The default value is large, e.g., a month, to " - "avoid forcing infrequently-used modules to be regenerated often.") - -TYPED_HEADERSEARCHOPT( - uint64_t, BuildSessionTimestamp, - "The time in seconds when the build session started. This time is used " - "by other optimizations in header search and module loading.") - -TYPED_HEADERSEARCHOPT(ModulesIgnoreMacrosTy, ModulesIgnoreMacros, - "The set of macro names that should be ignored for the " - "purposes of computing the module hash.") - -TYPED_HEADERSEARCHOPT( - std::vector, VFSOverlayFiles, - "The set of user-provided virtual filesystem overlay files.") - -#undef HEADERSEARCHOPT -#undef TYPED_HEADERSEARCHOPT diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h index 41a7ca915d794..3af49e1753956 100644 --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -94,14 +94,125 @@ class HeaderSearchOptions { : Prefix(Prefix), IsSystemHeader(IsSystemHeader) {} }; - using PrebuiltModuleFilesTy = std::map>; + /// If non-empty, the directory to use as a "virtual system root" for include + /// paths. + std::string Sysroot; + + /// User specified include entries. + std::vector UserEntries; + + /// User-specified system header prefixes. + std::vector SystemHeaderPrefixes; + + /// The directory which holds the compiler resource files (builtin includes, + /// etc.). + std::string ResourceDir; - using ModulesIgnoreMacrosTy = - llvm::SmallSetVector; + /// The directory used for the module cache. + std::string ModuleCachePath; + + /// The directory used for a user build. + std::string ModuleUserBuildPath; + + /// The mapping of module names to prebuilt module files. + std::map> PrebuiltModuleFiles; + + /// The directories used to load prebuilt module files. + std::vector PrebuiltModulePaths; + + /// The module/pch container format. + std::string ModuleFormat; + + /// Whether we should disable the use of the hash string within the + /// module cache. + /// + /// Note: Only used for testing! + unsigned DisableModuleHash : 1; + + /// Implicit module maps. This option is enabld by default when + /// modules is enabled. + unsigned ImplicitModuleMaps : 1; + + /// Set the 'home directory' of a module map file to the current + /// working directory (or the home directory of the module map file that + /// contained the 'extern module' directive importing this module map file + /// if any) rather than the directory containing the module map file. + // + /// The home directory is where we look for files named in the module map + /// file. + unsigned ModuleMapFileHomeIsCwd : 1; + + /// The interval (in seconds) between pruning operations. + /// + /// This operation is expensive, because it requires Clang to walk through + /// the directory structure of the module cache, stat()'ing and removing + /// files. + /// + /// The default value is large, e.g., the operation runs once a week. + unsigned ModuleCachePruneInterval = 7 * 24 * 60 * 60; + + /// The time (in seconds) after which an unused module file will be + /// considered unused and will, therefore, be pruned. + /// + /// When the module cache is pruned, any module file that has not been + /// accessed in this many seconds will be removed. The default value is + /// large, e.g., a month, to avoid forcing infrequently-used modules to be + /// regenerated often. + unsigned ModuleCachePruneAfter = 31 * 24 * 60 * 60; + + /// The time in seconds when the build session started. + /// + /// This time is used by other optimizations in header search and module + /// loading. + uint64_t BuildSessionTimestamp = 0; + + /// The set of macro names that should be ignored for the purposes + /// of computing the module hash. + llvm::SmallSetVector ModulesIgnoreMacros; + + /// The set of user-provided virtual filesystem overlay files. + std::vector VFSOverlayFiles; + + /// Include the compiler builtin includes. + unsigned UseBuiltinIncludes : 1; + + /// Include the system standard include search directories. + unsigned UseStandardSystemIncludes : 1; + + /// Include the system standard C++ library include search directories. + unsigned UseStandardCXXIncludes : 1; + + /// Use libc++ instead of the default libstdc++. + unsigned UseLibcxx : 1; + + /// Whether header search information should be output as for -v. + unsigned Verbose : 1; + + /// If true, skip verifying input files used by modules if the + /// module was already verified during this build session (see + /// \c BuildSessionTimestamp). + unsigned ModulesValidateOncePerBuildSession : 1; + + /// Whether to validate system input files when a module is loaded. + unsigned ModulesValidateSystemHeaders : 1; -#define HEADERSEARCHOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_HEADERSEARCHOPT(Type, Name, Description) Type Name; -#include "clang/Lex/HeaderSearchOptions.def" + // Whether the content of input files should be hashed and used to + // validate consistency. + unsigned ValidateASTInputFilesContent : 1; + + /// Whether the module includes debug information (-gmodules). + unsigned UseDebugInfo : 1; + + unsigned ModulesValidateDiagnosticOptions : 1; + + unsigned ModulesHashContent : 1; + + /// Whether we should include all things that could impact the module in the + /// hash. + /// + /// This includes things like the full header search path, and enabled + /// diagnostics. + unsigned ModulesStrictContextHash : 1; HeaderSearchOptions(StringRef _Sysroot = "/") : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false), @@ -112,9 +223,7 @@ class HeaderSearchOptions { ModulesValidateSystemHeaders(false), ValidateASTInputFilesContent(false), UseDebugInfo(false), ModulesValidateDiagnosticOptions(true), ModulesHashContent(false), - ModulesStrictContextHash(false), - ModuleCachePruneInterval(7 * 24 * 60 * 60), - ModuleCachePruneAfter(31 * 24 * 60 * 60), BuildSessionTimestamp(0) {} + ModulesStrictContextHash(false) {} /// AddPath - Add the \p Path path to the specified \p Group list. void AddPath(StringRef Path, frontend::IncludeDirGroup Group, diff --git a/clang/include/clang/Lex/PreprocessorOptions.def b/clang/include/clang/Lex/PreprocessorOptions.def deleted file mode 100644 index 5b9e982351a0d..0000000000000 --- a/clang/include/clang/Lex/PreprocessorOptions.def +++ /dev/null @@ -1,166 +0,0 @@ -//===--- PreprocessorOptions.def - Preprocessor option database -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the preprocessor options. Users of this file must -// define the TYPED_PREPROCESSOROPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_PREPROCESSOROPT -#error Define the TYPED_PREPROCESSOROPT macro to handle target options -#endif - -TYPED_PREPROCESSOROPT(MacrosTy, Macros, "") - -TYPED_PREPROCESSOROPT(std::vector, Includes, "") - -TYPED_PREPROCESSOROPT(std::vector, MacroIncludes, "") - -TYPED_PREPROCESSOROPT( - bool, UsePredefines, - "Initialize the preprocessor with the compiler and target " - "specific predefines.") - -TYPED_PREPROCESSOROPT( - bool, DetailedRecord, - "Whether we should maintain a detailed record of all macro " - "definitions and expansions.") - -TYPED_PREPROCESSOROPT( - bool, PCHWithHdrStop, - "When true, we are creating or using a PCH where a #pragma hdrstop is " - "expected to indicate the beginning or end of the PCH.") - -TYPED_PREPROCESSOROPT( - bool, PCHWithHdrStopCreate, - "When true, we are creating a PCH or creating the PCH object " - "while expecting a #pragma hdrstop to separate the two. Allow " - "for a missing #pragma hdrstop, which generates a PCH for the " - "whole file, and creates an empty PCH object.") - -TYPED_PREPROCESSOROPT( - std::string, PCHThroughHeader, - "If non-empty, the filename used in an #include directive in the primary " - "source file (or command-line preinclude) that is used to implement " - "MSVC-style precompiled headers. When creating a PCH, after the #include " - "of this header, the PCH generation stops. When using a PCH, tokens are " - "skipped until after an #include of this header is seen.") - -TYPED_PREPROCESSOROPT( - std::string, ImplicitPCHInclude, - "The implicit PCH included at the start of the translation unit, or empty.") - -TYPED_PREPROCESSOROPT( - std::vector, ChainedIncludes, - "Headers that will be converted to chained PCHs in memory.") - -TYPED_PREPROCESSOROPT( - bool, DisablePCHValidation, - "When true, disables most of the normal validation performed " - "on precompiled headers.") - -TYPED_PREPROCESSOROPT( - bool, AllowPCHWithCompilerErrors, - "When true, a PCH with compiler errors will not be rejected.") - -TYPED_PREPROCESSOROPT( - bool, DumpDeserializedPCHDecls, - "Dump declarations that are deserialized from PCH, for testing.") - -TYPED_PREPROCESSOROPT( - std::set, DeserializedPCHDeclsToErrorOn, - "This is a set of names for decls that we do not want to be deserialized, " - "and we emit an error if they are; for testing purposes.") - -TYPED_PREPROCESSOROPT( - PrecompiledPreambleBytesTy, PrecompiledPreambleBytes, - "If non-zero, the implicit PCH include is actually a precompiled preamble " - "that covers this number of bytes in the main source file. The boolean " - "indicates whether the preamble ends at the start of a new line.") - -TYPED_PREPROCESSOROPT( - bool, GeneratePreamble, - "True indicates that a preamble is being generated. When the " - "lexer is done, one of the things that need to be preserved is " - "the conditional #if stack, so the ASTWriter/ASTReader can " - "save/restore it when processing the rest of the file.") - -TYPED_PREPROCESSOROPT( - bool, WriteCommentListToPCH, - "Whether to write comment locations into the PCH when building " - "it. Reading the comments from the PCH can be a performance " - "hit even if the clients don't use them.") - -TYPED_PREPROCESSOROPT( - bool, SingleFileParseMode, - "When enabled, preprocessor is in a mode for parsing a single " - "file only. Disables #includes of other files and if there are " - "unresolved identifiers in preprocessor directive conditions " - "it causes all blocks to be parsed so that the client can get " - "the maximum amount of information from the parser.") - -TYPED_PREPROCESSOROPT( - bool, LexEditorPlaceholders, - "When enabled, the preprocessor will construct editor placeholder tokens.") - -TYPED_PREPROCESSOROPT( - bool, RemappedFilesKeepOriginalName, - "True if the SourceManager should report the original file name for " - "contents of files that were remapped to other files. Defaults to true.") - -TYPED_PREPROCESSOROPT( - RemappedFilesTy, RemappedFiles, - "The set of file remappings, which take existing files on the system (the " - "first part of each pair) and gives them the contents of other files on " - "the system (the second part of each pair).") - -TYPED_PREPROCESSOROPT( - RemappedFileBuffersTy, RemappedFileBuffers, - "The set of file-to-buffer remappings, which take existing files on the " - "system (the first part of each pair) and gives them the contents of the " - "specified memory buffer (the second part of each pair).") - -TYPED_PREPROCESSOROPT( - bool, RetainRemappedFileBuffers, - "Whether the compiler instance should retain (i.e., not free) " - "the buffers associated with remapped files. This flag " - "defaults to false; it can be set true only through direct " - "manipulation of the compiler invocation object, in cases " - "where the compiler invocation and its buffers will be reused.") - -TYPED_PREPROCESSOROPT( - bool, RetainExcludedConditionalBlocks, - "When enabled, excluded conditional blocks retain in the main file.") - -TYPED_PREPROCESSOROPT( - ObjCXXARCStandardLibraryKind, ObjCXXARCStandardLibrary, - "The Objective-C++ ARC standard library that we should support, by " - "providing appropriate definitions to retrofit the standard library with " - "support for lifetime-qualified pointers.") - -TYPED_PREPROCESSOROPT(std::shared_ptr, FailedModules, "") - -TYPED_PREPROCESSOROPT(MacroPrefixMapTy, MacroPrefixMap, - "A prefix map for __FILE__ and __BASE_FILE__.") - -TYPED_PREPROCESSOROPT( - ExcludedPreprocessorDirectiveSkipMapping *, - ExcludedConditionalDirectiveSkipMappings, - "Contains the currently active skipped range mappings for " - "skipping excluded conditional directives. The pointer is " - "passed to the Preprocessor when it's constructed. The pointer " - "is unowned, the client is responsible for its lifetime.") - -TYPED_PREPROCESSOROPT(bool, SetUpStaticAnalyzer, - "Set up preprocessor for RunAnalysis action.") - -TYPED_PREPROCESSOROPT( - bool, DisablePragmaDebugCrash, - "Prevents intended crashes when using #pragma clang __debug. For testing.") - -#undef TYPED_PREPROCESSOROPT diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index f379d50532287..c551f87e0d7bf 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -44,13 +44,114 @@ enum ObjCXXARCStandardLibraryKind { /// used in preprocessor initialization to InitializePreprocessor(). class PreprocessorOptions { public: - using MacrosTy = std::vector>; - using PrecompiledPreambleBytesTy = std::pair; - using RemappedFilesTy = std::vector>; - using RemappedFileBuffersTy = - std::vector>; - using MacroPrefixMapTy = - std::map>; + std::vector> Macros; + std::vector Includes; + std::vector MacroIncludes; + + /// Initialize the preprocessor with the compiler and target specific + /// predefines. + bool UsePredefines = true; + + /// Whether we should maintain a detailed record of all macro + /// definitions and expansions. + bool DetailedRecord = false; + + /// When true, we are creating or using a PCH where a #pragma hdrstop is + /// expected to indicate the beginning or end of the PCH. + bool PCHWithHdrStop = false; + + /// When true, we are creating a PCH or creating the PCH object while + /// expecting a #pragma hdrstop to separate the two. Allow for a + /// missing #pragma hdrstop, which generates a PCH for the whole file, + /// and creates an empty PCH object. + bool PCHWithHdrStopCreate = false; + + /// If non-empty, the filename used in an #include directive in the primary + /// source file (or command-line preinclude) that is used to implement + /// MSVC-style precompiled headers. When creating a PCH, after the #include + /// of this header, the PCH generation stops. When using a PCH, tokens are + /// skipped until after an #include of this header is seen. + std::string PCHThroughHeader; + + /// The implicit PCH included at the start of the translation unit, or empty. + std::string ImplicitPCHInclude; + + /// Headers that will be converted to chained PCHs in memory. + std::vector ChainedIncludes; + + /// When true, disables most of the normal validation performed on + /// precompiled headers. + bool DisablePCHValidation = false; + + /// When true, a PCH with compiler errors will not be rejected. + bool AllowPCHWithCompilerErrors = false; + + /// Dump declarations that are deserialized from PCH, for testing. + bool DumpDeserializedPCHDecls = false; + + /// This is a set of names for decls that we do not want to be + /// deserialized, and we emit an error if they are; for testing purposes. + std::set DeserializedPCHDeclsToErrorOn; + + /// If non-zero, the implicit PCH include is actually a precompiled + /// preamble that covers this number of bytes in the main source file. + /// + /// The boolean indicates whether the preamble ends at the start of a new + /// line. + std::pair PrecompiledPreambleBytes; + + /// True indicates that a preamble is being generated. + /// + /// When the lexer is done, one of the things that need to be preserved is the + /// conditional #if stack, so the ASTWriter/ASTReader can save/restore it when + /// processing the rest of the file. + bool GeneratePreamble = false; + + /// Whether to write comment locations into the PCH when building it. + /// Reading the comments from the PCH can be a performance hit even if the + /// clients don't use them. + bool WriteCommentListToPCH = true; + + /// When enabled, preprocessor is in a mode for parsing a single file only. + /// + /// Disables #includes of other files and if there are unresolved identifiers + /// in preprocessor directive conditions it causes all blocks to be parsed so + /// that the client can get the maximum amount of information from the parser. + bool SingleFileParseMode = false; + + /// When enabled, the preprocessor will construct editor placeholder tokens. + bool LexEditorPlaceholders = true; + + /// True if the SourceManager should report the original file name for + /// contents of files that were remapped to other files. Defaults to true. + bool RemappedFilesKeepOriginalName = true; + + /// The set of file remappings, which take existing files on + /// the system (the first part of each pair) and gives them the + /// contents of other files on the system (the second part of each + /// pair). + std::vector> RemappedFiles; + + /// The set of file-to-buffer remappings, which take existing files + /// on the system (the first part of each pair) and gives them the contents + /// of the specified memory buffer (the second part of each pair). + std::vector> RemappedFileBuffers; + + /// Whether the compiler instance should retain (i.e., not free) + /// the buffers associated with remapped files. + /// + /// This flag defaults to false; it can be set true only through direct + /// manipulation of the compiler invocation object, in cases where the + /// compiler invocation and its buffers will be reused. + bool RetainRemappedFileBuffers = false; + + /// When enabled, excluded conditional blocks retain in the main file. + bool RetainExcludedConditionalBlocks = false; + + /// The Objective-C++ ARC standard library that we should support, + /// by providing appropriate definitions to retrofit the standard library + /// with support for lifetime-qualified pointers. + ObjCXXARCStandardLibraryKind ObjCXXARCStandardLibrary = ARCXX_nolib; /// Records the set of modules class FailedModulesSet { @@ -66,21 +167,33 @@ class PreprocessorOptions { } }; -#define TYPED_PREPROCESSOROPT(Type, Name, Description) Type Name; -#include "clang/Lex/PreprocessorOptions.def" - - PreprocessorOptions() - : UsePredefines(true), DetailedRecord(false), PCHWithHdrStop(false), - PCHWithHdrStopCreate(false), DisablePCHValidation(false), - AllowPCHWithCompilerErrors(false), DumpDeserializedPCHDecls(false), - PrecompiledPreambleBytes(0, false), GeneratePreamble(false), - WriteCommentListToPCH(true), SingleFileParseMode(false), - LexEditorPlaceholders(true), RemappedFilesKeepOriginalName(true), - RetainRemappedFileBuffers(false), - RetainExcludedConditionalBlocks(false), - ObjCXXARCStandardLibrary(ARCXX_nolib), - ExcludedConditionalDirectiveSkipMappings(nullptr), - SetUpStaticAnalyzer(false), DisablePragmaDebugCrash(false) {} + /// The set of modules that failed to build. + /// + /// This pointer will be shared among all of the compiler instances created + /// to (re)build modules, so that once a module fails to build anywhere, + /// other instances will see that the module has failed and won't try to + /// build it again. + std::shared_ptr FailedModules; + + /// A prefix map for __FILE__ and __BASE_FILE__. + std::map> MacroPrefixMap; + + /// Contains the currently active skipped range mappings for skipping excluded + /// conditional directives. + /// + /// The pointer is passed to the Preprocessor when it's constructed. The + /// pointer is unowned, the client is responsible for its lifetime. + ExcludedPreprocessorDirectiveSkipMapping + *ExcludedConditionalDirectiveSkipMappings = nullptr; + + /// Set up preprocessor for RunAnalysis action. + bool SetUpStaticAnalyzer = false; + + /// Prevents intended crashes when using #pragma clang __debug. For testing. + bool DisablePragmaDebugCrash = false; + +public: + PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {} void addMacroDef(StringRef Name) { Macros.emplace_back(std::string(Name), false); diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index dfa4d7e1f2a12..66f22732e29cd 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -202,7 +202,8 @@ class Parser : public CodeCompletionHandler { std::unique_ptr UnrollAndJamHintHandler; std::unique_ptr NoUnrollAndJamHintHandler; std::unique_ptr FPHandler; - std::unique_ptr STDCFENVHandler; + std::unique_ptr STDCFenvAccessHandler; + std::unique_ptr STDCFenvRoundHandler; std::unique_ptr STDCCXLIMITHandler; std::unique_ptr STDCUnknownHandler; std::unique_ptr AttributePragmaHandler; @@ -745,6 +746,10 @@ class Parser : public CodeCompletionHandler { /// #pragma STDC FENV_ACCESS... void HandlePragmaFEnvAccess(); + /// Handle the annotation token produced for + /// #pragma STDC FENV_ROUND... + void HandlePragmaFEnvRound(); + /// Handle the annotation token produced for /// #pragma float_control void HandlePragmaFloatControl(); diff --git a/clang/include/clang/Sema/CodeCompleteOptions.def b/clang/include/clang/Sema/CodeCompleteOptions.def deleted file mode 100644 index dab8027929e5e..0000000000000 --- a/clang/include/clang/Sema/CodeCompleteOptions.def +++ /dev/null @@ -1,51 +0,0 @@ -//===--- CodeCompleteOptions.def - FileSystem option database ----*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the CodeComplete options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_CODE_COMPLETEOPT -#define TYPED_CODE_COMPLETEOPT(Type, Name, Description) -#endif - -#ifndef CODE_COMPLETEOPT -#define CODE_COMPLETEOPT(Name, Bits, Description) \ - TYPED_CODE_COMPLETEOPT(unsigned, Name, Description); -#endif - -CODE_COMPLETEOPT(IncludeMacros, 1, "Show macros in code completion results.") - -CODE_COMPLETEOPT(IncludeCodePatterns, 1, - "Show code patterns in code completion results.") - -CODE_COMPLETEOPT(IncludeGlobals, 1, - "Show top-level decls in code completion results.") - -CODE_COMPLETEOPT(IncludeNamespaceLevelDecls, 1, - "Show decls in namespace (including the global namespace) in " - "code completion results. If this is 0, `IncludeGlobals` will " - "be ignored. Currently, this only works when completing " - "qualified IDs (i.e. `Sema::CodeCompleteQualifiedId`). FIXME: " - "consider supporting more completion cases with this option.") - -CODE_COMPLETEOPT( - IncludeBriefComments, 1, - "Show brief documentation comments in code completion results.") - -CODE_COMPLETEOPT(LoadExternal, 1, - "Hint whether to load data from the external AST to provide " - "full results. If false, namespace-level declarations and " - "macros from the preamble may be omitted.") - -CODE_COMPLETEOPT(IncludeFixIts, 1, - "Include results after corrections (small fix-its), e.g. " - "change '.' to '->' on member access, etc.") - -#undef TYPED_CODE_COMPLETEOPT -#undef CODE_COMPLETEOPT \ No newline at end of file diff --git a/clang/include/clang/Sema/CodeCompleteOptions.h b/clang/include/clang/Sema/CodeCompleteOptions.h index 28cbc94fc84c2..a3403b01dcde9 100644 --- a/clang/include/clang/Sema/CodeCompleteOptions.h +++ b/clang/include/clang/Sema/CodeCompleteOptions.h @@ -14,14 +14,39 @@ namespace clang { /// Options controlling the behavior of code completion. class CodeCompleteOptions { public: -#define CODE_COMPLETEOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_CODE_COMPLETEOPT(Type, Name, Description) Type Name; -#include "clang/Sema/CodeCompleteOptions.def" + /// Show macros in code completion results. + unsigned IncludeMacros : 1; + + /// Show code patterns in code completion results. + unsigned IncludeCodePatterns : 1; + + /// Show top-level decls in code completion results. + unsigned IncludeGlobals : 1; + + /// Show decls in namespace (including the global namespace) in code + /// completion results. If this is 0, `IncludeGlobals` will be ignored. + /// + /// Currently, this only works when completing qualified IDs (i.e. + /// `Sema::CodeCompleteQualifiedId`). + /// FIXME: consider supporting more completion cases with this option. + unsigned IncludeNamespaceLevelDecls : 1; + + /// Show brief documentation comments in code completion results. + unsigned IncludeBriefComments : 1; + + /// Hint whether to load data from the external AST to provide full results. + /// If false, namespace-level declarations and macros from the preamble may be + /// omitted. + unsigned LoadExternal : 1; + + /// Include results after corrections (small fix-its), e.g. change '.' to '->' + /// on member access, etc. + unsigned IncludeFixIts : 1; CodeCompleteOptions() : IncludeMacros(0), IncludeCodePatterns(0), IncludeGlobals(1), - IncludeNamespaceLevelDecls(1), IncludeBriefComments(0), LoadExternal(1), - IncludeFixIts(0) {} + IncludeNamespaceLevelDecls(1), IncludeBriefComments(0), + LoadExternal(1), IncludeFixIts(0) {} }; } // namespace clang diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9b25973ba77ec..2632a92f91764 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9904,7 +9904,7 @@ class Sema final { /// \#pragma STDC FENV_ACCESS void ActOnPragmaFEnvAccess(SourceLocation Loc, bool IsEnabled); - /// Called to set rounding mode for floating point operations. + /// Called to set constant rounding mode for floating point operations. void setRoundingMode(SourceLocation Loc, llvm::RoundingMode); /// Called to set exception behavior for floating point operations. @@ -11400,10 +11400,6 @@ class Sema final { ExprResult PerformImplicitConversion(Expr *From, QualType ToType, AssignmentAction Action, bool AllowExplicit = false); - ExprResult PerformImplicitConversion(Expr *From, QualType ToType, - AssignmentAction Action, - bool AllowExplicit, - ImplicitConversionSequence& ICS); ExprResult PerformImplicitConversion(Expr *From, QualType ToType, const ImplicitConversionSequence& ICS, AssignmentAction Action, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b6892e295ac7c..29c4f15e57b09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -900,8 +900,9 @@ class ASTReader /// Delete expressions to analyze at the end of translation unit. SmallVector DelayedDeleteExprs; - // A list of late parsed template function data. - SmallVector LateParsedTemplates; + // A list of late parsed template function data with their module files. + SmallVector>, 4> + LateParsedTemplates; /// The IDs of all decls to be checked for deferred diags. /// diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index a444843c50060..a61af45231348 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -349,7 +349,6 @@ let ParentPackage = APIModeling in { def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, HelpText<"Improve modeling of the C standard library functions">, - Dependencies<[CallAndMessageModeling]>, CheckerOptions<[ CmdLineOption, "such as whether the parameter of isalpha is in the range [0, 255] " "or is EOF.">, Dependencies<[StdCLibraryFunctionsChecker]>, - WeakDependencies<[NonNullParamChecker]>, + WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>, Documentation; } // end "alpha.unix" diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index ff253ca15c0ea..f0359d2dbb3c2 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -6,10 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the analyzer options avaible with -analyzer-config, using -// the ANLAYZER_OPTION and ANALYZER_OPTION_DEPENDS_ON_USER_MODE macros. -// Other analyzer options use the simpler ANALYZEROPT and TYPED_ANALYZEROPT -// macro. +// This file defines the analyzer options avaible with -analyzer-config. // //===----------------------------------------------------------------------===// @@ -32,15 +29,6 @@ define both 'ANALYZER_OPTION' and 'ANALYZER_OPTION_DEPENDS_ON_USER_MODE' macros! #endif #endif -#ifndef TYPED_ANALYZEROPT -#define TYPED_ANALYZEROPT(TYPE, NAME, DESCRIPTION) -#endif - -#ifndef ANALYZEROPT -#define ANALYZEROPT(NAME, BITS, DESCRIPTION) \ - TYPED_ANALYZEROPT(unsigned, NAME, DESCRITPTION) -#endif - #ifndef ANALYZER_OPTION /// Create a new analyzer option, but dont generate a method for it in /// AnalyzerOptions. @@ -54,8 +42,7 @@ define both 'ANALYZER_OPTION' and 'ANALYZER_OPTION_DEPENDS_ON_USER_MODE' macros! /// (-analyzer-config CMDFLAG=VALUE) /// DESC - Description of the flag. /// DEFAULT_VAL - The default value for CMDFLAG. -#define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ - TYPED_ANALYZEROPT(TYPE, NAME, DESC) +#define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) #endif #ifndef ANALYZER_OPTION_DEPENDS_ON_USER_MODE @@ -75,8 +62,7 @@ define both 'ANALYZER_OPTION' and 'ANALYZER_OPTION_DEPENDS_ON_USER_MODE' macros! /// DEEP_VAL - The default value for CMDFLAG, when "user-mode" was set to /// "deep". #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ - SHALLOW_VAL, DEEP_VAL) \ - TYPED_ANALYZEROPT(TYPE, NAME, DESC) + SHALLOW_VAL, DEEP_VAL) #endif //===----------------------------------------------------------------------===// @@ -449,79 +435,5 @@ ANALYZER_OPTION_DEPENDS_ON_USER_MODE( "\"basic-inlining\", \"inlining\", \"dynamic\", \"dynamic-bifurcate\".", /* SHALLOW_VAL */ "inlining", /* DEEP_VAL */ "dynamic-bifurcate") -//===----------------------------------------------------------------------===// -// Other analyzer options. -//===----------------------------------------------------------------------===// - -TYPED_ANALYZEROPT(CheckersAndPackagesTy, CheckersAndPackages, - "Pairs of checker/package name and enable/disable.") - -TYPED_ANALYZEROPT( - std::vector, SilencedCheckersAndPackages, - "Vector of checker/package names which will not emit warnings.") - -TYPED_ANALYZEROPT(ConfigTable, Config, - "A key-value table of use-specified configuration values.") -TYPED_ANALYZEROPT(AnalysisStores, AnalysisStoreOpt, "") -TYPED_ANALYZEROPT(AnalysisConstraints, AnalysisConstraintsOpt, "") -TYPED_ANALYZEROPT(AnalysisDiagClients, AnalysisDiagOpt, "") -TYPED_ANALYZEROPT(AnalysisPurgeMode, AnalysisPurgeOpt, "") - -TYPED_ANALYZEROPT(std::string, AnalyzeSpecificFunction, "") - -TYPED_ANALYZEROPT(std::string, DumpExplodedGraphTo, - "File path to which the exploded graph should be dumped.") - -TYPED_ANALYZEROPT(std::string, FullCompilerInvocation, - "Store full compiler invocation for reproducible " - "instructions in the generated report.") - -TYPED_ANALYZEROPT(unsigned, maxBlockVisitOnPath, - "The maximum number of times the analyzer visits a block.") - -ANALYZEROPT( - DisableAllCheckers, 1, - "Disable all analyzer checkers. This flag allows one to disable analyzer " - "checkers on the code processed by the given analysis consumer. Note, the " - "code will get parsed and the command-line options will get checked.") - -ANALYZEROPT(ShowCheckerHelp, 1, "") -ANALYZEROPT(ShowCheckerHelpAlpha, 1, "") -ANALYZEROPT(ShowCheckerHelpDeveloper, 1, "") - -ANALYZEROPT(ShowCheckerOptionList, 1, "") -ANALYZEROPT(ShowCheckerOptionAlphaList, 1, "") -ANALYZEROPT(ShowCheckerOptionDeveloperList, 1, "") - -ANALYZEROPT(ShowEnabledCheckerList, 1, "") -ANALYZEROPT(ShowConfigOptionsList, 1, "") -ANALYZEROPT(ShouldEmitErrorsOnInvalidConfigValue, 1, "") -ANALYZEROPT(AnalyzeAll, 1, "") -ANALYZEROPT(AnalyzerDisplayProgress, 1, "") -ANALYZEROPT(AnalyzeNestedBlocks, 1, "") - -ANALYZEROPT(eagerlyAssumeBinOpBifurcation, 1, "") - -ANALYZEROPT(TrimGraph, 1, "") -ANALYZEROPT(visualizeExplodedGraphWithGraphViz, 1, "") -ANALYZEROPT(UnoptimizedCFG, 1, "") -ANALYZEROPT(PrintStats, 1, "") - -ANALYZEROPT( - NoRetryExhausted, 1, - "Do not re-analyze paths leading to exhausted nodes with a different " - "strategy. We get better code coverage when retry is enabled.") - -ANALYZEROPT(AnalyzerWerror, 1, "Emit analyzer warnings as errors.") - -TYPED_ANALYZEROPT(unsigned, InlineMaxStackDepth, - "The inlining stack depth limit. Cap the stack depth at 4 " - "calls (5 stack frames, base + 4 calls).") - -TYPED_ANALYZEROPT(AnalysisInliningMode, InliningMode, - "The mode of function selection used during inlining.") - #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE #undef ANALYZER_OPTION -#undef TYPED_ANALYZEROPT -#undef ANALYZEROPT diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h index 8d81f90294174..4907b0757a8a4 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h @@ -162,7 +162,6 @@ enum UserModeKind { class AnalyzerOptions : public RefCountedBase { public: using ConfigTable = llvm::StringMap; - using CheckersAndPackagesTy = std::vector>; /// Retrieves the list of checkers generated from Checkers.td. This doesn't /// contain statically linked but non-generated checkers and plugin checkers! @@ -196,9 +195,86 @@ class AnalyzerOptions : public RefCountedBase { size_t InitialPad, size_t EntryWidth, size_t MinLineWidth = 0); -#define ANALYZEROPT(NAME, BITS, DESCRIPTION) unsigned NAME : BITS; -#define TYPED_ANALYZEROPT(TYPE, NAME, DESCRIPTION) TYPE NAME; + /// Pairs of checker/package name and enable/disable. + std::vector> CheckersAndPackages; + + /// Vector of checker/package names which will not emit warnings. + std::vector SilencedCheckersAndPackages; + + /// A key-value table of use-specified configuration values. + // TODO: This shouldn't be public. + ConfigTable Config; + AnalysisStores AnalysisStoreOpt = RegionStoreModel; + AnalysisConstraints AnalysisConstraintsOpt = RangeConstraintsModel; + AnalysisDiagClients AnalysisDiagOpt = PD_HTML; + AnalysisPurgeMode AnalysisPurgeOpt = PurgeStmt; + + std::string AnalyzeSpecificFunction; + + /// File path to which the exploded graph should be dumped. + std::string DumpExplodedGraphTo; + + /// Store full compiler invocation for reproducible instructions in the + /// generated report. + std::string FullCompilerInvocation; + + /// The maximum number of times the analyzer visits a block. + unsigned maxBlockVisitOnPath; + + /// Disable all analyzer checkers. + /// + /// This flag allows one to disable analyzer checkers on the code processed by + /// the given analysis consumer. Note, the code will get parsed and the + /// command-line options will get checked. + unsigned DisableAllCheckers : 1; + + unsigned ShowCheckerHelp : 1; + unsigned ShowCheckerHelpAlpha : 1; + unsigned ShowCheckerHelpDeveloper : 1; + + unsigned ShowCheckerOptionList : 1; + unsigned ShowCheckerOptionAlphaList : 1; + unsigned ShowCheckerOptionDeveloperList : 1; + + unsigned ShowEnabledCheckerList : 1; + unsigned ShowConfigOptionsList : 1; + unsigned ShouldEmitErrorsOnInvalidConfigValue : 1; + unsigned AnalyzeAll : 1; + unsigned AnalyzerDisplayProgress : 1; + unsigned AnalyzeNestedBlocks : 1; + + unsigned eagerlyAssumeBinOpBifurcation : 1; + + unsigned TrimGraph : 1; + unsigned visualizeExplodedGraphWithGraphViz : 1; + unsigned UnoptimizedCFG : 1; + unsigned PrintStats : 1; + + /// Do not re-analyze paths leading to exhausted nodes with a different + /// strategy. We get better code coverage when retry is enabled. + unsigned NoRetryExhausted : 1; + + /// Emit analyzer warnings as errors. + unsigned AnalyzerWerror : 1; + + /// The inlining stack depth limit. + // Cap the stack depth at 4 calls (5 stack frames, base + 4 calls). + unsigned InlineMaxStackDepth = 5; + + /// The mode of function selection used during inlining. + AnalysisInliningMode InliningMode = NoRedundancy; + + // Create a field for each -analyzer-config option. +#define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ + SHALLOW_VAL, DEEP_VAL) \ + ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, SHALLOW_VAL) + +#define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ + TYPE NAME; + #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def" +#undef ANALYZER_OPTION +#undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE // Create an array of all -analyzer-config command line options. Sort it in // the constructor. @@ -223,19 +299,15 @@ class AnalyzerOptions : public RefCountedBase { } AnalyzerOptions() - : AnalysisStoreOpt(RegionStoreModel), - AnalysisConstraintsOpt(RangeConstraintsModel), AnalysisDiagOpt(PD_HTML), - AnalysisPurgeOpt(PurgeStmt), DisableAllCheckers(false), - ShowCheckerHelp(false), ShowCheckerHelpAlpha(false), - ShowCheckerHelpDeveloper(false), ShowCheckerOptionList(false), - ShowCheckerOptionAlphaList(false), + : DisableAllCheckers(false), ShowCheckerHelp(false), + ShowCheckerHelpAlpha(false), ShowCheckerHelpDeveloper(false), + ShowCheckerOptionList(false), ShowCheckerOptionAlphaList(false), ShowCheckerOptionDeveloperList(false), ShowEnabledCheckerList(false), ShowConfigOptionsList(false), AnalyzeAll(false), AnalyzerDisplayProgress(false), AnalyzeNestedBlocks(false), eagerlyAssumeBinOpBifurcation(false), TrimGraph(false), visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false), - PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false), - InlineMaxStackDepth(5), InliningMode(NoRedundancy) { + PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false) { llvm::sort(AnalyzerConfigCmdFlags); } diff --git a/clang/include/clang/Tooling/Transformer/RewriteRule.h b/clang/include/clang/Tooling/Transformer/RewriteRule.h index 9700d1ff539de..4bdcc8d5c3296 100644 --- a/clang/include/clang/Tooling/Transformer/RewriteRule.h +++ b/clang/include/clang/Tooling/Transformer/RewriteRule.h @@ -380,6 +380,38 @@ EditGenerator rewriteDescendants(std::string NodeId, RewriteRule Rule); // RewriteRule API. Recast them as such. Or, just declare these functions // public and well-supported and move them out of `detail`. namespace detail { +/// The following overload set is a version of `rewriteDescendants` that +/// operates directly on the AST, rather than generating a Transformer +/// combinator. It applies `Rule` to all descendants of `Node`, although not +/// `Node` itself. `Rule` can refer to nodes bound in `Result`. +/// +/// For example, assuming that "body" is bound to a function body in MatchResult +/// `Results`, this will produce edits to change all appearances of `x` in that +/// body to `3`. +/// ``` +/// auto InlineX = +/// makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); +/// const auto *Node = Results.Nodes.getNodeAs("body"); +/// auto Edits = rewriteDescendants(*Node, InlineX, Results); +/// ``` +/// @{ +llvm::Expected> +rewriteDescendants(const Decl &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); + +llvm::Expected> +rewriteDescendants(const Stmt &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); + +llvm::Expected> +rewriteDescendants(const TypeLoc &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); + +llvm::Expected> +rewriteDescendants(const DynTypedNode &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); +/// @} + /// Builds a single matcher for the rule, covering all of the rule's cases. /// Only supports Rules whose cases' matchers share the same base "kind" /// (`Stmt`, `Decl`, etc.) Deprecated: use `buildMatchers` instead, which diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 2a8834b4db0cb..08ae0ff3c67d3 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -304,6 +304,25 @@ APValue::APValue(const APValue &RHS) : Kind(None) { } } +APValue::APValue(APValue &&RHS) : Kind(RHS.Kind), Data(RHS.Data) { + RHS.Kind = None; +} + +APValue &APValue::operator=(const APValue &RHS) { + if (this != &RHS) + *this = APValue(RHS); + return *this; +} + +APValue &APValue::operator=(APValue &&RHS) { + if (Kind != None && Kind != Indeterminate) + DestroyDataAndMakeUninit(); + Kind = RHS.Kind; + Data = RHS.Data; + RHS.Kind = None; + return *this; +} + void APValue::DestroyDataAndMakeUninit() { if (Kind == Int) ((APSInt*)(char*)Data.buffer)->~APSInt(); @@ -372,10 +391,7 @@ bool APValue::needsCleanup() const { void APValue::swap(APValue &RHS) { std::swap(Kind, RHS.Kind); - char TmpData[DataSize]; - memcpy(TmpData, Data.buffer, DataSize); - memcpy(Data.buffer, RHS.Data.buffer, DataSize); - memcpy(RHS.Data.buffer, TmpData, DataSize); + std::swap(Data, RHS.Data); } static double GetApproxValue(const llvm::APFloat &F) { diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index 35099fd0dacf8..dfd26fd97bc6d 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -55,6 +55,7 @@ add_clang_library(clangAST ExternalASTMerger.cpp ExternalASTSource.cpp FormatString.cpp + IgnoreExpr.cpp InheritViz.cpp Interp/ByteCodeEmitter.cpp Interp/ByteCodeExprGen.cpp diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index b1a8e00f272f2..2a7017635b08c 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1299,7 +1299,8 @@ LinkageInfo LinkageComputer::getLVForLocalDecl(const NamedDecl *D, // we should not make static local variables in the function hidden. LV = getLVForDecl(FD, computation); if (isa(D) && useInlineVisibilityHidden(FD) && - !LV.isVisibilityExplicit()) { + !LV.isVisibilityExplicit() && + !Context.getLangOpts().VisibilityInlinesHiddenStaticLocalVar) { assert(cast(D)->isStaticLocal()); // If this was an implicitly hidden inline method, check again for // explicit visibility on the parent class, and use that for static locals diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 8efd6837c541b..15f3df0fd2168 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DependenceFlags.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/IgnoreExpr.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" @@ -39,7 +40,7 @@ using namespace clang; const Expr *Expr::getBestDynamicClassTypeExpr() const { const Expr *E = this; while (true) { - E = E->ignoreParenBaseCasts(); + E = E->IgnoreParenBaseCasts(); // Follow the RHS of a comma operator. if (auto *BO = dyn_cast(E)) { @@ -2779,162 +2780,8 @@ QualType Expr::findBoundMemberType(const Expr *expr) { return QualType(); } -static Expr *IgnoreImpCastsSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - return E; -} - -static Expr *IgnoreImpCastsExtraSingleStep(Expr *E) { - // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in - // addition to what IgnoreImpCasts() skips to account for the current - // behaviour of IgnoreParenImpCasts(). - Expr *SubE = IgnoreImpCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -static Expr *IgnoreCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - return CE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -static Expr *IgnoreLValueCastsSingleStep(Expr *E) { - // Skip what IgnoreCastsSingleStep skips, except that only - // lvalue-to-rvalue casts are skipped. - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() != CK_LValueToRValue) - return E; - - return IgnoreCastsSingleStep(E); -} - -static Expr *IgnoreBaseCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() == CK_DerivedToBase || - CE->getCastKind() == CK_UncheckedDerivedToBase || - CE->getCastKind() == CK_NoOp) - return CE->getSubExpr(); - - return E; -} - -static Expr *IgnoreImplicitSingleStep(Expr *E) { - Expr *SubE = IgnoreImpCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *BTE = dyn_cast(E)) - return BTE->getSubExpr(); - - return E; -} - -static Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExprAsWritten(); - - return IgnoreImplicitSingleStep(E); -} - -static Expr *IgnoreParensOnlySingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - return E; -} - -static Expr *IgnoreParensSingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - - if (auto *UO = dyn_cast(E)) { - if (UO->getOpcode() == UO_Extension) - return UO->getSubExpr(); - } - - else if (auto *GSE = dyn_cast(E)) { - if (!GSE->isResultDependent()) - return GSE->getResultExpr(); - } - - else if (auto *CE = dyn_cast(E)) { - if (!CE->isConditionDependent()) - return CE->getChosenSubExpr(); - } - - return E; -} - -static Expr *IgnoreNoopCastsSingleStep(const ASTContext &Ctx, Expr *E) { - if (auto *CE = dyn_cast(E)) { - // We ignore integer <-> casts that are of the same width, ptr<->ptr and - // ptr<->int casts of the same width. We also ignore all identity casts. - Expr *SubExpr = CE->getSubExpr(); - bool IsIdentityCast = - Ctx.hasSameUnqualifiedType(E->getType(), SubExpr->getType()); - bool IsSameWidthCast = - (E->getType()->isPointerType() || E->getType()->isIntegralType(Ctx)) && - (SubExpr->getType()->isPointerType() || - SubExpr->getType()->isIntegralType(Ctx)) && - (Ctx.getTypeSize(E->getType()) == Ctx.getTypeSize(SubExpr->getType())); - - if (IsIdentityCast || IsSameWidthCast) - return SubExpr; - } - - else if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -static Expr *IgnoreExprNodesImpl(Expr *E) { return E; } -template -static Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { - return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); -} - -/// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, -/// Recursively apply each of the functions to E until reaching a fixed point. -/// Note that a null E is valid; in this case nothing is done. -template -static Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { - Expr *LastE = nullptr; - while (E != LastE) { - LastE = E; - E = IgnoreExprNodesImpl(E, std::forward(Fns)...); - } - return E; -} - Expr *Expr::IgnoreImpCasts() { - return IgnoreExprNodes(this, IgnoreImpCastsSingleStep); + return IgnoreExprNodes(this, IgnoreImplicitCastsSingleStep); } Expr *Expr::IgnoreCasts() { @@ -2955,14 +2802,14 @@ Expr *Expr::IgnoreParens() { Expr *Expr::IgnoreParenImpCasts() { return IgnoreExprNodes(this, IgnoreParensSingleStep, - IgnoreImpCastsExtraSingleStep); + IgnoreImplicitCastsExtraSingleStep); } Expr *Expr::IgnoreParenCasts() { return IgnoreExprNodes(this, IgnoreParensSingleStep, IgnoreCastsSingleStep); } -Expr *Expr::IgnoreConversionOperator() { +Expr *Expr::IgnoreConversionOperatorSingleStep() { if (auto *MCE = dyn_cast(this)) { if (MCE->getMethodDecl() && isa(MCE->getMethodDecl())) return MCE->getImplicitObjectArgument(); @@ -2975,58 +2822,72 @@ Expr *Expr::IgnoreParenLValueCasts() { IgnoreLValueCastsSingleStep); } -Expr *Expr::ignoreParenBaseCasts() { +Expr *Expr::IgnoreParenBaseCasts() { return IgnoreExprNodes(this, IgnoreParensSingleStep, IgnoreBaseCastsSingleStep); } Expr *Expr::IgnoreParenNoopCasts(const ASTContext &Ctx) { - return IgnoreExprNodes(this, IgnoreParensSingleStep, [&Ctx](Expr *E) { - return IgnoreNoopCastsSingleStep(Ctx, E); - }); + auto IgnoreNoopCastsSingleStep = [&Ctx](Expr *E) { + if (auto *CE = dyn_cast(E)) { + // We ignore integer <-> casts that are of the same width, ptr<->ptr and + // ptr<->int casts of the same width. We also ignore all identity casts. + Expr *SubExpr = CE->getSubExpr(); + bool IsIdentityCast = + Ctx.hasSameUnqualifiedType(E->getType(), SubExpr->getType()); + bool IsSameWidthCast = (E->getType()->isPointerType() || + E->getType()->isIntegralType(Ctx)) && + (SubExpr->getType()->isPointerType() || + SubExpr->getType()->isIntegralType(Ctx)) && + (Ctx.getTypeSize(E->getType()) == + Ctx.getTypeSize(SubExpr->getType())); + + if (IsIdentityCast || IsSameWidthCast) + return SubExpr; + } else if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; + }; + return IgnoreExprNodes(this, IgnoreParensSingleStep, + IgnoreNoopCastsSingleStep); } Expr *Expr::IgnoreUnlessSpelledInSource() { - Expr *E = this; - - Expr *LastE = nullptr; - while (E != LastE) { - LastE = E; - E = IgnoreExprNodes(E, IgnoreImplicitSingleStep, - IgnoreImpCastsExtraSingleStep, - IgnoreParensOnlySingleStep); - - auto SR = E->getSourceRange(); - + auto IgnoreImplicitConstructorSingleStep = [](Expr *E) { if (auto *C = dyn_cast(E)) { auto NumArgs = C->getNumArgs(); if (NumArgs == 1 || (NumArgs > 1 && isa(C->getArg(1)))) { Expr *A = C->getArg(0); - if (A->getSourceRange() == SR || !isa(C)) - E = A; + if (A->getSourceRange() == E->getSourceRange() || + !isa(C)) + return A; } } - + return E; + }; + auto IgnoreImplicitMemberCallSingleStep = [](Expr *E) { if (auto *C = dyn_cast(E)) { Expr *ExprNode = C->getImplicitObjectArgument(); - if (ExprNode->getSourceRange() == SR) { - E = ExprNode; - continue; + if (ExprNode->getSourceRange() == E->getSourceRange()) { + return ExprNode; } if (auto *PE = dyn_cast(ExprNode)) { if (PE->getSourceRange() == C->getSourceRange()) { - E = PE; - continue; + return cast(PE); } } ExprNode = ExprNode->IgnoreParenImpCasts(); - if (ExprNode->getSourceRange() == SR) - E = ExprNode; + if (ExprNode->getSourceRange() == E->getSourceRange()) + return ExprNode; } - } - - return E; + return E; + }; + return IgnoreExprNodes( + this, IgnoreImplicitSingleStep, IgnoreImplicitCastsExtraSingleStep, + IgnoreParensOnlySingleStep, IgnoreImplicitConstructorSingleStep, + IgnoreImplicitMemberCallSingleStep); } bool Expr::isDefaultArgument() const { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1479e87f2a0df..b6083fdc16fcf 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -6627,9 +6627,15 @@ class APValueToBufferConverter { } bool visitInt(const APSInt &Val, QualType Ty, CharUnits Offset) { - CharUnits Width = Info.Ctx.getTypeSizeInChars(Ty); - SmallVector Bytes(Width.getQuantity()); - llvm::StoreIntToMemory(Val, &*Bytes.begin(), Width.getQuantity()); + APSInt AdjustedVal = Val; + unsigned Width = AdjustedVal.getBitWidth(); + if (Ty->isBooleanType()) { + Width = Info.Ctx.getTypeSize(Ty); + AdjustedVal = AdjustedVal.extend(Width); + } + + SmallVector Bytes(Width / 8); + llvm::StoreIntToMemory(AdjustedVal, &*Bytes.begin(), Width / 8); Buffer.writeObject(Offset, Bytes); return true; } @@ -6670,6 +6676,13 @@ class BufferToAPValueConverter { return None; } + llvm::NoneType unrepresentableValue(QualType Ty, const APSInt &Val) { + Info.FFDiag(BCE->getBeginLoc(), + diag::note_constexpr_bit_cast_unrepresentable_value) + << Ty << Val.toString(/*Radix=*/10); + return None; + } + Optional visit(const BuiltinType *T, CharUnits Offset, const EnumType *EnumSugar = nullptr) { if (T->isNullPtrType()) { @@ -6680,6 +6693,20 @@ class BufferToAPValueConverter { } CharUnits SizeOf = Info.Ctx.getTypeSizeInChars(T); + + // Work around floating point types that contain unused padding bytes. This + // is really just `long double` on x86, which is the only fundamental type + // with padding bytes. + if (T->isRealFloatingType()) { + const llvm::fltSemantics &Semantics = + Info.Ctx.getFloatTypeSemantics(QualType(T, 0)); + unsigned NumBits = llvm::APFloatBase::getSizeInBits(Semantics); + assert(NumBits % 8 == 0); + CharUnits NumBytes = CharUnits::fromQuantity(NumBits / 8); + if (NumBytes != SizeOf) + SizeOf = NumBytes; + } + SmallVector Bytes; if (!Buffer.readObject(Offset, SizeOf, Bytes)) { // If this is std::byte or unsigned char, then its okay to store an @@ -6704,6 +6731,15 @@ class BufferToAPValueConverter { if (T->isIntegralOrEnumerationType()) { Val.setIsSigned(T->isSignedIntegerOrEnumerationType()); + + unsigned IntWidth = Info.Ctx.getIntWidth(QualType(T, 0)); + if (IntWidth != Val.getBitWidth()) { + APSInt Truncated = Val.trunc(IntWidth); + if (Truncated.extend(Val.getBitWidth()) != Val) + return unrepresentableValue(QualType(T, 0), Val); + Val = Truncated; + } + return APValue(Val); } diff --git a/clang/lib/AST/IgnoreExpr.cpp b/clang/lib/AST/IgnoreExpr.cpp new file mode 100644 index 0000000000000..65aaaeb6a1ed0 --- /dev/null +++ b/clang/lib/AST/IgnoreExpr.cpp @@ -0,0 +1,129 @@ +//===--- IgnoreExpr.cpp - Ignore intermediate Expressions -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements common functions to ignore intermediate expression nodes +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/IgnoreExpr.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" + +using namespace clang; + +Expr *clang::IgnoreImplicitCastsSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + return E; +} + +Expr *clang::IgnoreImplicitCastsExtraSingleStep(Expr *E) { + // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in + // addition to what IgnoreImpCasts() skips to account for the current + // behaviour of IgnoreParenImpCasts(). + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +Expr *clang::IgnoreCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + return CE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +Expr *clang::IgnoreLValueCastsSingleStep(Expr *E) { + // Skip what IgnoreCastsSingleStep skips, except that only + // lvalue-to-rvalue casts are skipped. + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() != CK_LValueToRValue) + return E; + + return IgnoreCastsSingleStep(E); +} + +Expr *clang::IgnoreBaseCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() == CK_DerivedToBase || + CE->getCastKind() == CK_UncheckedDerivedToBase || + CE->getCastKind() == CK_NoOp) + return CE->getSubExpr(); + + return E; +} + +Expr *clang::IgnoreImplicitSingleStep(Expr *E) { + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *BTE = dyn_cast(E)) + return BTE->getSubExpr(); + + return E; +} + +Expr *clang::IgnoreImplicitAsWrittenSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExprAsWritten(); + + return IgnoreImplicitSingleStep(E); +} + +Expr *clang::IgnoreParensOnlySingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + return E; +} + +Expr *clang::IgnoreParensSingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + + if (auto *UO = dyn_cast(E)) { + if (UO->getOpcode() == UO_Extension) + return UO->getSubExpr(); + } + + else if (auto *GSE = dyn_cast(E)) { + if (!GSE->isResultDependent()) + return GSE->getResultExpr(); + } + + else if (auto *CE = dyn_cast(E)) { + if (!CE->isConditionDependent()) + return CE->getChosenSubExpr(); + } + + return E; +} diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 64e0da9e64b12..5b97265a6d8ae 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1266,13 +1266,21 @@ ClassifyDiagnostic(const AttrTy *A) { } bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { - if (!CurrentMethod) + const threadSafety::til::SExpr *SExp = CapE.sexpr(); + assert(SExp && "Null expressions should be ignored"); + + // Global variables are always in scope. + if (isa(SExp)) + return true; + + // Members are in scope from methods of the same class. + if (const auto *P = dyn_cast(SExp)) { + if (!CurrentMethod) return false; - if (const auto *P = dyn_cast_or_null(CapE.sexpr())) { - const auto *VD = P->clangDecl(); - if (VD) - return VD->getDeclContext() == CurrentMethod->getDeclContext(); + const ValueDecl *VD = P->clangDecl(); + return VD->getDeclContext() == CurrentMethod->getDeclContext(); } + return false; } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index 1b8c55e56d470..aee9185760071 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -274,7 +274,7 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, const auto *VD = cast(DRE->getDecl()->getCanonicalDecl()); // Function parameters require substitution and/or renaming. - if (const auto *PV = dyn_cast_or_null(VD)) { + if (const auto *PV = dyn_cast(VD)) { unsigned I = PV->getFunctionScopeIndex(); const DeclContext *D = PV->getDeclContext(); if (Ctx && Ctx->FunArgs) { diff --git a/clang/lib/Basic/CodeGenOptions.cpp b/clang/lib/Basic/CodeGenOptions.cpp index 9e04b5ced2bb3..4fc7a535c9eb9 100644 --- a/clang/lib/Basic/CodeGenOptions.cpp +++ b/clang/lib/Basic/CodeGenOptions.cpp @@ -10,9 +10,8 @@ #include namespace clang { -CodeGenOptions::CodeGenOptions() - : FPDenormalMode(llvm::DenormalMode::getIEEE()), - FP32DenormalMode(llvm::DenormalMode::getIEEE()), Argv0(nullptr) { + +CodeGenOptions::CodeGenOptions() { #define CODEGENOPT(Name, Bits, Default) Name = Default; #define ENUM_CODEGENOPT(Name, Type, Bits, Default) set##Name(Default); #include "clang/Basic/CodeGenOptions.def" diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 344d326a92e48..c08670c87fb69 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -14,8 +14,7 @@ using namespace clang; -LangOptions::LangOptions() - : CFRuntime(CoreFoundationABI::Unspecified), IsHeaderFile(false) { +LangOptions::LangOptions() { #define LANGOPT(Name, Bits, Default, Description) Name = Default; #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) set##Name(Default); #include "clang/Basic/LangOptions.def" diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 6fd97d4e57869..7f0a0f0d86dc1 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -378,8 +378,7 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); if (Opts.ArmSveVectorBits) - Builder.defineMacro("__ARM_FEATURE_SVE_BITS_EXPERIMENTAL", - Twine(Opts.ArmSveVectorBits)); + Builder.defineMacro("__ARM_FEATURE_SVE_BITS", Twine(Opts.ArmSveVectorBits)); } ArrayRef AArch64TargetInfo::getTargetBuiltins() const { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ddeec25d8cf5a..ff9dcd5022029 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11320,16 +11320,6 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, return EmitX86Select(CGF, Ops[2], Res, Ops[1]); } -static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef Ops) { - - llvm::Type *Ty = Ops[0]->getType(); - Value *Zero = llvm::Constant::getNullValue(Ty); - Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); - Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); - Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); - return Res; -} - static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef Ops) { Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); @@ -11549,13 +11539,9 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } -// Emit addition or subtraction with signed/unsigned saturation. -static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, - ArrayRef Ops, bool IsSigned, - bool IsAddition) { - Intrinsic::ID IID = - IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) - : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); +// Emit binary intrinsic with the same type used in result/args. +static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF, + ArrayRef Ops, Intrinsic::ID IID) { llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -13310,9 +13296,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pabsb512: case X86::BI__builtin_ia32_pabsw512: case X86::BI__builtin_ia32_pabsd512: - case X86::BI__builtin_ia32_pabsq512: - return EmitX86Abs(*this, Ops); - + case X86::BI__builtin_ia32_pabsq512: { + Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: @@ -14039,28 +14026,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_paddsw256: case X86::BI__builtin_ia32_paddsb128: case X86::BI__builtin_ia32_paddsw128: - return EmitX86AddSubSatExpr(*this, Ops, true, true); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat); case X86::BI__builtin_ia32_paddusb512: case X86::BI__builtin_ia32_paddusw512: case X86::BI__builtin_ia32_paddusb256: case X86::BI__builtin_ia32_paddusw256: case X86::BI__builtin_ia32_paddusb128: case X86::BI__builtin_ia32_paddusw128: - return EmitX86AddSubSatExpr(*this, Ops, false, true); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat); case X86::BI__builtin_ia32_psubsb512: case X86::BI__builtin_ia32_psubsw512: case X86::BI__builtin_ia32_psubsb256: case X86::BI__builtin_ia32_psubsw256: case X86::BI__builtin_ia32_psubsb128: case X86::BI__builtin_ia32_psubsw128: - return EmitX86AddSubSatExpr(*this, Ops, true, false); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat); case X86::BI__builtin_ia32_psubusb512: case X86::BI__builtin_ia32_psubusw512: case X86::BI__builtin_ia32_psubusb256: case X86::BI__builtin_ia32_psubusw256: case X86::BI__builtin_ia32_psubusb128: case X86::BI__builtin_ia32_psubusw128: - return EmitX86AddSubSatExpr(*this, Ops, false, false); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); } } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 8c3ed69d79b73..93ef2e42a1410 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1038,6 +1038,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, uint64_t Size = 0; uint32_t Align = 0; + const RecordDecl *D = RD->getDefinition(); + if (D && D->isCompleteDefinition()) + Size = CGM.getContext().getTypeSize(Ty); + llvm::DINode::DIFlags Flags = llvm::DINode::FlagFwdDecl; // Add flag to nontrivial forward declarations. To be consistent with MSVC, diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index d0e0c7d6c0603..50b6079bd80bf 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -220,7 +220,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl); assert(DevirtualizedMethod); const CXXRecordDecl *DevirtualizedClass = DevirtualizedMethod->getParent(); - const Expr *Inner = Base->ignoreParenBaseCasts(); + const Expr *Inner = Base->IgnoreParenBaseCasts(); if (DevirtualizedMethod->getReturnType().getCanonicalType() != MD->getReturnType().getCanonicalType()) // If the return types are not the same, this might be a case where more diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 257343f678388..01bae3b04699c 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2048,7 +2048,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { - assert(!GV->isDeclaration() && + assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 50a0523a495a4..3ca5ca2ffb4c6 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9066,13 +9066,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by - // --gpu-max-threads-per-block=n or its default value for HIP. - const unsigned OpenCLDefaultMaxWorkGroupSize = 256; - const unsigned DefaultMaxWorkGroupSize = - IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize - : M.getLangOpts().GPUMaxThreadsPerBlock; + // --gpu-max-threads-per-block=n or its default value. std::string AttrVal = - std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); + std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index cce0eb557a9c6..0f51443010ca4 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -866,8 +866,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, LinkCXXRuntimes) || D.CCCIsCXX(); - NeedsHeapProfRt = - Args.hasFlag(options::OPT_fmemprof, options::OPT_fno_memprof, false); + NeedsHeapProfRt = Args.hasFlag(options::OPT_fmemory_profile, + options::OPT_fno_memory_profile, false); // Finally, initialize the set of available and recoverable sanitizers. Sanitizers.Mask |= Kinds; diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index 043b7f257c01d..70ba8eb2a7d0d 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -21,12 +21,19 @@ using namespace llvm::opt; const char *sparc::getSparcAsmModeForCPU(StringRef Name, const llvm::Triple &Triple) { if (Triple.getArch() == llvm::Triple::sparcv9) { + const char *DefV9CPU; + + if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD()) + DefV9CPU = "-Av9a"; + else + DefV9CPU = "-Av9"; + return llvm::StringSwitch(Name) .Case("niagara", "-Av9b") .Case("niagara2", "-Av9b") .Case("niagara3", "-Av9d") .Case("niagara4", "-Av9d") - .Default("-Av9"); + .Default(DefV9CPU); } else { return llvm::StringSwitch(Name) .Case("v8", "-Av8") diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0e90a1bf89f16..b06f75908ff66 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4368,8 +4368,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.getLastArg(options::OPT_save_temps_EQ)) Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ); - if (Args.hasFlag(options::OPT_fmemprof, options::OPT_fno_memprof, false)) - Args.AddLastArg(CmdArgs, options::OPT_fmemprof); + if (Args.hasFlag(options::OPT_fmemory_profile, + options::OPT_fno_memory_profile, false)) + Args.AddLastArg(CmdArgs, options::OPT_fmemory_profile); // Embed-bitcode option. // Only white-listed flags below are allowed to be embedded. @@ -5359,6 +5360,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden); + Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var, + options::OPT_fno_visibility_inlines_hidden_static_local_var); Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden); Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 04349ff6af984..9d22cda217116 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -2408,6 +2408,13 @@ void Darwin::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, // Enable compatibility mode for NSItemProviderCompletionHandler in // Foundation/NSItemProvider.h. CC1Args.push_back("-fcompatibility-qualified-id-block-type-checking"); + + // Give static local variables in inline functions hidden visibility when + // -fvisibility-inlines-hidden is enabled. + if (!DriverArgs.getLastArgNoClaim( + options::OPT_fvisibility_inlines_hidden_static_local_var, + options::OPT_fno_visibility_inlines_hidden_static_local_var)) + CC1Args.push_back("-fvisibility-inlines-hidden-static-local-var"); } DerivedArgList * diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index fe11cba9bfdf0..5dda2bda06b54 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -475,6 +475,7 @@ template <> struct MappingTraits { Style.AlwaysBreakBeforeMultilineStrings); IO.mapOptional("AlwaysBreakTemplateDeclarations", Style.AlwaysBreakTemplateDeclarations); + IO.mapOptional("AttributeMacros", Style.AttributeMacros); IO.mapOptional("BinPackArguments", Style.BinPackArguments); IO.mapOptional("BinPackParameters", Style.BinPackParameters); IO.mapOptional("BraceWrapping", Style.BraceWrapping); @@ -842,6 +843,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; LLVMStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; + LLVMStyle.AttributeMacros.push_back("__capability"); LLVMStyle.BinPackArguments = true; LLVMStyle.BinPackParameters = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 4bc865b043fd2..8e4994f4c0d57 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -62,6 +62,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_char32_t: case tok::kw_typeof: case tok::kw_decltype: + case tok::kw__Atomic: return true; default: return false; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index a54600a478a46..76ef99e72d58e 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -29,6 +29,7 @@ namespace format { TYPE(ArrayInitializerLSquare) \ TYPE(ArraySubscriptLSquare) \ TYPE(AttributeColon) \ + TYPE(AttributeMacro) \ TYPE(AttributeParen) \ TYPE(AttributeSquare) \ TYPE(BinaryOperator) \ @@ -100,6 +101,7 @@ namespace format { TYPE(TrailingAnnotation) \ TYPE(TrailingReturnArrow) \ TYPE(TrailingUnaryOperator) \ + TYPE(TypeDeclarationParen) \ TYPE(TypenameMacro) \ TYPE(UnaryOperator) \ TYPE(UntouchableMacroFunc) \ @@ -442,7 +444,8 @@ struct FormatToken { bool canBePointerOrReferenceQualifier() const { return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile, tok::kw___attribute, tok::kw__Nonnull, tok::kw__Nullable, - tok::kw__Null_unspecified); + tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64, + TT_AttributeMacro); } /// Determine whether the token is a simple-type-specifier. @@ -523,7 +526,9 @@ struct FormatToken { case tok::kw_decltype: case tok::kw_noexcept: case tok::kw_static_assert: + case tok::kw__Atomic: case tok::kw___attribute: + case tok::kw___underlying_type: return true; default: return false; diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 1fd153d1112eb..f6db58acd8dbe 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -39,6 +39,8 @@ FormatTokenLexer::FormatTokenLexer( for (const std::string &ForEachMacro : Style.ForEachMacros) Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro}); + for (const std::string &AttributeMacro : Style.AttributeMacros) + Macros.insert({&IdentTable.get(AttributeMacro), TT_AttributeMacro}); for (const std::string &StatementMacro : Style.StatementMacros) Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro}); for (const std::string &TypenameMacro : Style.TypenameMacros) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index a9077500e041f..5dd6a7a9da40b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -185,6 +185,8 @@ class AnnotatingParser { if (!CurrentToken) return false; FormatToken *Left = CurrentToken->Previous; + FormatToken *PrevNonComment = + Left ? Left->getPreviousNonComment() : nullptr; Left->ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); @@ -216,9 +218,8 @@ class AnnotatingParser { // export type X = (...); Contexts.back().IsExpression = false; } else if (Left->Previous && - (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, - tok::kw_while, tok::l_paren, - tok::comma) || + (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_while, + tok::l_paren, tok::comma) || Left->Previous->isIf() || Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. @@ -242,8 +243,16 @@ class AnnotatingParser { } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; - } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { + } else if (PrevNonComment && PrevNonComment->is(tok::kw___attribute)) { Left->setType(TT_AttributeParen); + } else if (PrevNonComment && + PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype, + tok::kw_typeof, tok::kw__Atomic, + tok::kw___underlying_type)) { + Left->setType(TT_TypeDeclarationParen); + // decltype() and typeof() usually contain expressions. + if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof)) + Contexts.back().IsExpression = true; } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; @@ -335,6 +344,8 @@ class AnnotatingParser { if (Left->is(TT_AttributeParen)) CurrentToken->setType(TT_AttributeParen); + if (Left->is(TT_TypeDeclarationParen)) + CurrentToken->setType(TT_TypeDeclarationParen); if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) CurrentToken->setType(TT_JavaAnnotation); if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) @@ -940,9 +951,9 @@ class AnnotatingParser { return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && - (!Tok->Previous || - !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute, - TT_LeadingJavaAnnotation))) + !Tok->is(TT_TypeDeclarationParen) && + (!Tok->Previous || !Tok->Previous->isOneOf(tok::kw___attribute, + TT_LeadingJavaAnnotation))) Line.MightBeFunctionDecl = true; break; case tok::l_square: @@ -1333,11 +1344,12 @@ class AnnotatingParser { // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). if (!CurrentToken->isOneOf( - TT_LambdaLSquare, TT_LambdaLBrace, TT_ForEachMacro, - TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, - TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro, - TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString, - TT_ObjCStringLiteral, TT_UntouchableMacroFunc)) + TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, + TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace, + TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, + TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator, + TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral, + TT_UntouchableMacroFunc)) CurrentToken->setType(TT_Unknown); CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1753,9 +1765,8 @@ class AnnotatingParser { PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); - if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && - PreviousNotConst->MatchingParen->Previous && - PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) + if (PreviousNotConst->is(tok::r_paren) && + PreviousNotConst->is(TT_TypeDeclarationParen)) return true; return (!IsPPKeyword && @@ -1840,6 +1851,12 @@ class AnnotatingParser { T = T->MatchingParen->Previous->Previous; continue; } + } else if (T->is(TT_AttributeSquare)) { + // Handle `x = (foo *[[clang::foo]])&v;`: + if (T->MatchingParen && T->MatchingParen->Previous) { + T = T->MatchingParen->Previous; + continue; + } } else if (T->canBePointerOrReferenceQualifier()) { T = T->Previous; continue; @@ -1848,9 +1865,11 @@ class AnnotatingParser { } return T && T->is(TT_PointerOrReference); }; - bool ParensAreType = !Tok.Previous || Tok.Previous->is(TT_TemplateCloser) || - Tok.Previous->isSimpleTypeSpecifier() || - IsQualifiedPointerOrReference(Tok.Previous); + bool ParensAreType = + !Tok.Previous || + Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) || + Tok.Previous->isSimpleTypeSpecifier() || + IsQualifiedPointerOrReference(Tok.Previous); bool ParensCouldEndDecl = Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); if (ParensAreType && !ParensCouldEndDecl) @@ -1918,6 +1937,9 @@ class AnnotatingParser { if (PrevToken->is(tok::coloncolon)) return TT_PointerOrReference; + if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen)) + return TT_PointerOrReference; + if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, tok::equal, tok::kw_delete, tok::kw_sizeof, @@ -1933,15 +1955,6 @@ class AnnotatingParser { if (NextToken->isOneOf(tok::comma, tok::semi)) return TT_PointerOrReference; - if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) { - FormatToken *TokenBeforeMatchingParen = - PrevToken->MatchingParen->getPreviousNonComment(); - if (TokenBeforeMatchingParen && - TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype, - TT_TypenameMacro)) - return TT_PointerOrReference; - } - if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, tok::kw_false, tok::r_brace) || @@ -2387,6 +2400,8 @@ static bool isFunctionDeclarationName(const FormatToken &Current, return true; for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { + if (Tok->is(TT_TypeDeclarationParen)) + return true; if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) { Tok = Tok->MatchingParen; continue; @@ -2835,9 +2850,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return true; FormatToken *TokenBeforeMatchingParen = Left.MatchingParen->getPreviousNonComment(); - if (!TokenBeforeMatchingParen || - !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype, - TT_TypenameMacro)) + if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen)) return true; } return (Left.Tok.isLiteral() || @@ -3935,7 +3948,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.is(tok::equal) && Right.is(tok::l_brace) && !Style.Cpp11BracedListStyle) return false; - if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) + if (Left.is(tok::l_paren) && + Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) return false; if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 392ba0f30f105..ebcb51a567aa9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1041,7 +1041,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.ThinLinkBitcodeFile = std::string(Args.getLastArgValue(OPT_fthin_link_bitcode_EQ)); - Opts.HeapProf = Args.hasArg(OPT_fmemprof); + Opts.HeapProf = Args.hasArg(OPT_fmemory_profile); Opts.MSVolatile = Args.hasArg(OPT_fms_volatile); @@ -2793,6 +2793,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, if (Args.hasArg(OPT_fvisibility_inlines_hidden)) Opts.InlineVisibilityHidden = 1; + if (Args.hasArg(OPT_fvisibility_inlines_hidden_static_local_var)) + Opts.VisibilityInlinesHiddenStaticLocalVar = 1; + if (Args.hasArg(OPT_fvisibility_global_new_delete_hidden)) Opts.GlobalAllocationFunctionVisibilityHidden = 1; diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 927f25751664a..22744adefbefd 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -1766,36 +1766,12 @@ vec_cmpne(vector unsigned int __a, vector unsigned int __b) { (vector int)__b); } -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector bool long long __a, vector bool long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector signed long long __a, vector signed long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector float __a, vector float __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector double __a, vector double __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - /* vec_cmpnez */ static __inline__ vector bool char __ATTRS_o_ai @@ -1900,6 +1876,86 @@ vec_parity_lsbb(vector signed long long __a) { return __builtin_altivec_vprtybd(__a); } +#else +/* vec_cmpne */ + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector bool char __a, vector bool char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector signed char __a, vector signed char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector unsigned char __a, vector unsigned char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector bool short __a, vector bool short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector signed short __a, vector signed short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector unsigned short __a, vector unsigned short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector bool int __a, vector bool int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector signed int __a, vector signed int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector unsigned int __a, vector unsigned int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector float __a, vector float __b) { + return ~(vec_cmpeq(__a, __b)); +} +#endif + +#ifdef __POWER8_VECTOR__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector bool long long __a, vector bool long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector signed long long __a, vector signed long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} +#endif + +#ifdef __VSX__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector double __a, vector double __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} #endif /* vec_cmpgt */ @@ -2702,67 +2758,67 @@ vec_insert_exp(vector unsigned int __a, vector unsigned int __b) { } #if defined(__powerpc64__) -static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(signed char *__a, +static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(const signed char *__a, size_t __b) { return (vector signed char)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_len(unsigned char *__a, size_t __b) { +vec_xl_len(const unsigned char *__a, size_t __b) { return (vector unsigned char)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(signed short *__a, +static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(const signed short *__a, size_t __b) { return (vector signed short)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned short __ATTRS_o_ai -vec_xl_len(unsigned short *__a, size_t __b) { +vec_xl_len(const unsigned short *__a, size_t __b) { return (vector unsigned short)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(signed int *__a, +static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(const signed int *__a, size_t __b) { return (vector signed int)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(unsigned int *__a, +static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(const unsigned int *__a, size_t __b) { return (vector unsigned int)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector float __ATTRS_o_ai vec_xl_len(float *__a, size_t __b) { +static __inline__ vector float __ATTRS_o_ai vec_xl_len(const float *__a, size_t __b) { return (vector float)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed __int128 __ATTRS_o_ai -vec_xl_len(signed __int128 *__a, size_t __b) { +vec_xl_len(const signed __int128 *__a, size_t __b) { return (vector signed __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_len(unsigned __int128 *__a, size_t __b) { +vec_xl_len(const unsigned __int128 *__a, size_t __b) { return (vector unsigned __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed long long __ATTRS_o_ai -vec_xl_len(signed long long *__a, size_t __b) { +vec_xl_len(const signed long long *__a, size_t __b) { return (vector signed long long)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned long long __ATTRS_o_ai -vec_xl_len(unsigned long long *__a, size_t __b) { +vec_xl_len(const unsigned long long *__a, size_t __b) { return (vector unsigned long long)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a, +static __inline__ vector double __ATTRS_o_ai vec_xl_len(const double *__a, size_t __b) { return (vector double)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_len_r(unsigned char *__a, size_t __b) { +vec_xl_len_r(const unsigned char *__a, size_t __b) { vector unsigned char __res = (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56)); #ifdef __LITTLE_ENDIAN__ @@ -5487,6 +5543,16 @@ vec_msum(vector unsigned short __a, vector unsigned short __b, return __builtin_altivec_vmsumuhm(__a, __b, __c); } +/* vec_msumc */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_msumc(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vmsumcud(__a, __b, __c); +} +#endif + /* vec_vmsummbm */ static __inline__ vector int __attribute__((__always_inline__)) @@ -5713,6 +5779,26 @@ vec_mule(vector unsigned int __a, vector unsigned int __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mule(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmulosd(__a, __b); +#else + return __builtin_altivec_vmulesd(__a, __b); +#endif +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mule(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmuloud(__a, __b); +#else + return __builtin_altivec_vmuleud(__a, __b); +#endif +} +#endif + /* vec_vmulesb */ static __inline__ vector short __attribute__((__always_inline__)) @@ -5839,6 +5925,26 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mulo(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmulesd(__a, __b); +#else + return __builtin_altivec_vmulosd(__a, __b); +#endif +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mulo(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmuleud(__a, __b); +#else + return __builtin_altivec_vmuloud(__a, __b); +#endif +} +#endif + /* vec_vmulosb */ static __inline__ vector short __attribute__((__always_inline__)) @@ -16397,41 +16503,41 @@ typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1))); typedef vector float unaligned_vec_float __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset, - signed char *__ptr) { + const signed char *__ptr) { return *(unaligned_vec_schar *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned char -vec_xl(signed long long __offset, unsigned char *__ptr) { +vec_xl(signed long long __offset, const unsigned char *__ptr) { return *(unaligned_vec_uchar*)(__ptr + __offset); } static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset, - signed short *__ptr) { + const signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sshort *)__addr; } static inline __ATTRS_o_ai vector unsigned short -vec_xl(signed long long __offset, unsigned short *__ptr) { +vec_xl(signed long long __offset, const unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ushort *)__addr; } static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset, - signed int *__ptr) { + const signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sint *)__addr; } static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset, - unsigned int *__ptr) { + const unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_uint *)__addr; } static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset, - float *__ptr) { + const float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_float *)__addr; } @@ -16442,19 +16548,19 @@ typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1))); typedef vector double unaligned_vec_double __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed long long -vec_xl(signed long long __offset, signed long long *__ptr) { +vec_xl(signed long long __offset, const signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sll *)__addr; } static inline __ATTRS_o_ai vector unsigned long long -vec_xl(signed long long __offset, unsigned long long *__ptr) { +vec_xl(signed long long __offset, const unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ull *)__addr; } static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset, - double *__ptr) { + const double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_double *)__addr; } @@ -16465,13 +16571,13 @@ typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1))); typedef vector unsigned __int128 unaligned_vec_ui128 __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 -vec_xl(signed long long __offset, signed __int128 *__ptr) { +vec_xl(signed long long __offset, const signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_si128 *)__addr; } static inline __ATTRS_o_ai vector unsigned __int128 -vec_xl(signed long long __offset, unsigned __int128 *__ptr) { +vec_xl(signed long long __offset, const unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ui128 *)__addr; } @@ -16481,71 +16587,71 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) { #ifdef __LITTLE_ENDIAN__ static __inline__ vector signed char __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed char *__ptr) { +vec_xl_be(signed long long __offset, const signed char *__ptr) { vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned char *__ptr) { +vec_xl_be(signed long long __offset, const unsigned char *__ptr) { vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector signed short __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed short *__ptr) { +vec_xl_be(signed long long __offset, const signed short *__ptr) { vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector unsigned short __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned short *__ptr) { +vec_xl_be(signed long long __offset, const unsigned short *__ptr) { vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector signed int __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed int *__ptr) { +vec_xl_be(signed long long __offset, const signed int *__ptr) { return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector unsigned int __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned int *__ptr) { +vec_xl_be(signed long long __offset, const unsigned int *__ptr) { return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector float __ATTRS_o_ai -vec_xl_be(signed long long __offset, float *__ptr) { +vec_xl_be(signed long long __offset, const float *__ptr) { return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed long long *__ptr) { +vec_xl_be(signed long long __offset, const signed long long *__ptr) { return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector unsigned long long __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned long long *__ptr) { +vec_xl_be(signed long long __offset, const unsigned long long *__ptr) { return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector double __ATTRS_o_ai -vec_xl_be(signed long long __offset, double *__ptr) { +vec_xl_be(signed long long __offset, const double *__ptr) { return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr); } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) static __inline__ vector signed __int128 __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed __int128 *__ptr) { +vec_xl_be(signed long long __offset, const signed __int128 *__ptr) { return vec_xl(__offset, __ptr); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { +vec_xl_be(signed long long __offset, const unsigned __int128 *__ptr) { return vec_xl(__offset, __ptr); } #endif @@ -16558,44 +16664,44 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { /* vect_xl_sext */ static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed char *__pointer) { +vec_xl_sext(signed long long __offset, const signed char *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed short *__pointer) { +vec_xl_sext(signed long long __offset, const signed short *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed int *__pointer) { +vec_xl_sext(signed long long __offset, const signed int *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed long long *__pointer) { +vec_xl_sext(signed long long __offset, const signed long long *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } /* vec_xl_zext */ static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned char *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned char *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned short *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned short *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned int *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned int *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned long long *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned long long *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } @@ -16935,6 +17041,33 @@ vec_extractm(vector unsigned __int128 __a) { return __builtin_altivec_vextractqm(__a); } +/* vec_expandm */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_expandm(vector unsigned char __a) { + return __builtin_altivec_vexpandbm(__a); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_expandm(vector unsigned short __a) { + return __builtin_altivec_vexpandhm(__a); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_expandm(vector unsigned int __a) { + return __builtin_altivec_vexpandwm(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_expandm(vector unsigned long long __a) { + return __builtin_altivec_vexpanddm(__a); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_expandm(vector unsigned __int128 __a) { + return __builtin_altivec_vexpandqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 6402b31d00b29..572fc7115b879 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -135,6 +135,14 @@ struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { } }; +/// Handler for "\#pragma STDC FENV_ROUND ...". +struct PragmaSTDC_FENV_ROUNDHandler : public PragmaHandler { + PragmaSTDC_FENV_ROUNDHandler() : PragmaHandler("FENV_ROUND") {} + + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &Tok) override; +}; + /// PragmaSTDC_UnknownHandler - "\#pragma STDC ...". struct PragmaSTDC_UnknownHandler : public PragmaHandler { PragmaSTDC_UnknownHandler() = default; @@ -312,8 +320,11 @@ void Parser::initializePragmaHandlers() { FPContractHandler = std::make_unique(); PP.AddPragmaHandler("STDC", FPContractHandler.get()); - STDCFENVHandler = std::make_unique(); - PP.AddPragmaHandler("STDC", STDCFENVHandler.get()); + STDCFenvAccessHandler = std::make_unique(); + PP.AddPragmaHandler("STDC", STDCFenvAccessHandler.get()); + + STDCFenvRoundHandler = std::make_unique(); + PP.AddPragmaHandler("STDC", STDCFenvRoundHandler.get()); STDCCXLIMITHandler = std::make_unique(); PP.AddPragmaHandler("STDC", STDCCXLIMITHandler.get()); @@ -485,8 +496,11 @@ void Parser::resetPragmaHandlers() { PP.RemovePragmaHandler("STDC", FPContractHandler.get()); FPContractHandler.reset(); - PP.RemovePragmaHandler("STDC", STDCFENVHandler.get()); - STDCFENVHandler.reset(); + PP.RemovePragmaHandler("STDC", STDCFenvAccessHandler.get()); + STDCFenvAccessHandler.reset(); + + PP.RemovePragmaHandler("STDC", STDCFenvRoundHandler.get()); + STDCFenvRoundHandler.reset(); PP.RemovePragmaHandler("STDC", STDCCXLIMITHandler.get()); STDCCXLIMITHandler.reset(); @@ -697,6 +711,14 @@ void Parser::HandlePragmaFEnvAccess() { Actions.ActOnPragmaFEnvAccess(PragmaLoc, IsEnabled); } +void Parser::HandlePragmaFEnvRound() { + assert(Tok.is(tok::annot_pragma_fenv_round)); + auto RM = static_cast( + reinterpret_cast(Tok.getAnnotationValue())); + + SourceLocation PragmaLoc = ConsumeAnnotationToken(); + Actions.setRoundingMode(PragmaLoc, RM); +} StmtResult Parser::HandlePragmaCaptured() { @@ -2929,6 +2951,56 @@ void PragmaFPHandler::HandlePragma(Preprocessor &PP, /*DisableMacroExpansion=*/false, /*IsReinject=*/false); } +void PragmaSTDC_FENV_ROUNDHandler::HandlePragma(Preprocessor &PP, + PragmaIntroducer Introducer, + Token &Tok) { + Token PragmaName = Tok; + SmallVector TokenList; + + PP.Lex(Tok); + if (Tok.isNot(tok::identifier)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_expected_identifier) + << PragmaName.getIdentifierInfo()->getName(); + return; + } + IdentifierInfo *II = Tok.getIdentifierInfo(); + + auto RM = + llvm::StringSwitch(II->getName()) + .Case("FE_TOWARDZERO", llvm::RoundingMode::TowardZero) + .Case("FE_TONEAREST", llvm::RoundingMode::NearestTiesToEven) + .Case("FE_UPWARD", llvm::RoundingMode::TowardPositive) + .Case("FE_DOWNWARD", llvm::RoundingMode::TowardNegative) + .Case("FE_TONEARESTFROMZERO", llvm::RoundingMode::NearestTiesToAway) + .Case("FE_DYNAMIC", llvm::RoundingMode::Dynamic) + .Default(llvm::RoundingMode::Invalid); + if (RM == llvm::RoundingMode::Invalid) { + PP.Diag(Tok.getLocation(), diag::warn_stdc_unknown_rounding_mode); + return; + } + PP.Lex(Tok); + + if (Tok.isNot(tok::eod)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << "STDC FENV_ROUND"; + return; + } + + // Until the pragma is fully implemented, issue a warning. + PP.Diag(Tok.getLocation(), diag::warn_stdc_fenv_round_not_supported); + + MutableArrayRef Toks(PP.getPreprocessorAllocator().Allocate(1), + 1); + Toks[0].startToken(); + Toks[0].setKind(tok::annot_pragma_fenv_round); + Toks[0].setLocation(Tok.getLocation()); + Toks[0].setAnnotationEndLoc(Tok.getLocation()); + Toks[0].setAnnotationValue( + reinterpret_cast(static_cast(RM))); + PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true, + /*IsReinject=*/false); +} + void Parser::HandlePragmaFP() { assert(Tok.is(tok::annot_pragma_fp)); auto *AnnotValue = diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 48a583b8c67db..dc946bd94bbba 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -370,6 +370,12 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( HandlePragmaFEnvAccess(); return StmtEmpty(); + case tok::annot_pragma_fenv_round: + ProhibitAttributes(Attrs); + Diag(Tok, diag::err_pragma_file_or_compound_scope) << "STDC FENV_ROUND"; + ConsumeAnnotationToken(); + return StmtError(); + case tok::annot_pragma_float_control: ProhibitAttributes(Attrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "float_control"; @@ -944,6 +950,9 @@ void Parser::ParseCompoundStatementLeadingPragmas() { case tok::annot_pragma_fenv_access: HandlePragmaFEnvAccess(); break; + case tok::annot_pragma_fenv_round: + HandlePragmaFEnvRound(); + break; case tok::annot_pragma_float_control: HandlePragmaFloatControl(); break; diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index c72ffde8fc263..109f24425777d 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -783,6 +783,9 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs, case tok::annot_pragma_fenv_access: HandlePragmaFEnvAccess(); return nullptr; + case tok::annot_pragma_fenv_round: + HandlePragmaFEnvRound(); + return nullptr; case tok::annot_pragma_float_control: HandlePragmaFloatControl(); return nullptr; diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index e34f7371506dd..bd5fc586b6af7 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -979,6 +979,11 @@ void Sema::ActOnPragmaFPReassociate(SourceLocation Loc, bool IsEnabled) { } void Sema::setRoundingMode(SourceLocation Loc, llvm::RoundingMode FPR) { + // C2x: 7.6.2p3 If the FE_DYNAMIC mode is specified and FENV_ACCESS is "off", + // the translator may assume that the default rounding mode is in effect. + if (FPR == llvm::RoundingMode::Dynamic && !CurFPFeatures.getAllowFEnvAccess()) + FPR = llvm::RoundingMode::NearestTiesToEven; + FPOptionsOverride NewFPFeatures = CurFPFeatureOverrides(); NewFPFeatures.setRoundingModeOverride(FPR); FpPragmaStack.Act(Loc, PSK_Set, StringRef(), NewFPFeatures); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index db3a80ae876ac..75200cdfd64ef 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4615,8 +4615,8 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, << SourceRange(base->getBeginLoc(), rbLoc); return ExprError(); } - // If the base is either a MatrixSubscriptExpr or a matrix type, try to create - // a new MatrixSubscriptExpr. + // If the base is a MatrixSubscriptExpr, try to create a new + // MatrixSubscriptExpr. auto *matSubscriptE = dyn_cast(base); if (matSubscriptE) { if (CheckAndReportCommaError(idx)) @@ -4627,34 +4627,13 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, return CreateBuiltinMatrixSubscriptExpr( matSubscriptE->getBase(), matSubscriptE->getRowIdx(), idx, rbLoc); } - Expr *matrixBase = base; - bool IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); - if (!IsMSPropertySubscript) { - ExprResult result = CheckPlaceholderExpr(base); - if (!result.isInvalid()) - matrixBase = result.get(); - } - if (matrixBase->getType()->isMatrixType()) { - if (CheckAndReportCommaError(idx)) - return ExprError(); - - return CreateBuiltinMatrixSubscriptExpr(matrixBase, idx, nullptr, rbLoc); - } - - // A comma-expression as the index is deprecated in C++2a onwards. - if (getLangOpts().CPlusPlus20 && - ((isa(idx) && cast(idx)->isCommaOp()) || - (isa(idx) && - cast(idx)->getOperator() == OO_Comma))) { - Diag(idx->getExprLoc(), diag::warn_deprecated_comma_subscript) - << SourceRange(base->getBeginLoc(), rbLoc); - } // Handle any non-overload placeholder types in the base and index // expressions. We can't handle overloads here because the other // operand might be an overloadable type, in which case the overload // resolution for the operator overload should get the first crack // at the overload. + bool IsMSPropertySubscript = false; if (base->getType()->isNonOverloadPlaceholderType()) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { @@ -4664,6 +4643,24 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, base = result.get(); } } + + // If the base is a matrix type, try to create a new MatrixSubscriptExpr. + if (base->getType()->isMatrixType()) { + if (CheckAndReportCommaError(idx)) + return ExprError(); + + return CreateBuiltinMatrixSubscriptExpr(base, idx, nullptr, rbLoc); + } + + // A comma-expression as the index is deprecated in C++2a onwards. + if (getLangOpts().CPlusPlus20 && + ((isa(idx) && cast(idx)->isCommaOp()) || + (isa(idx) && + cast(idx)->getOperator() == OO_Comma))) { + Diag(idx->getExprLoc(), diag::warn_deprecated_comma_subscript) + << SourceRange(base->getBeginLoc(), rbLoc); + } + if (idx->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(idx); if (result.isInvalid()) return ExprError(); @@ -8400,7 +8397,7 @@ static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); - E = E->IgnoreConversionOperator(); + E = E->IgnoreConversionOperatorSingleStep(); E = E->IgnoreImpCasts(); if (auto *MTE = dyn_cast(E)) { E = MTE->getSubExpr(); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 21a9ad04d5008..71341e5688fe0 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1494,17 +1494,9 @@ Sema::TryImplicitConversion(Expr *From, QualType ToType, /// converted expression. Flavor is the kind of conversion we're /// performing, used in the error message. If @p AllowExplicit, /// explicit user-defined conversions are permitted. -ExprResult -Sema::PerformImplicitConversion(Expr *From, QualType ToType, - AssignmentAction Action, bool AllowExplicit) { - ImplicitConversionSequence ICS; - return PerformImplicitConversion(From, ToType, Action, AllowExplicit, ICS); -} - -ExprResult -Sema::PerformImplicitConversion(Expr *From, QualType ToType, - AssignmentAction Action, bool AllowExplicit, - ImplicitConversionSequence& ICS) { +ExprResult Sema::PerformImplicitConversion(Expr *From, QualType ToType, + AssignmentAction Action, + bool AllowExplicit) { if (checkPlaceholderForOverload(*this, From)) return ExprError(); @@ -1515,13 +1507,13 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, if (getLangOpts().ObjC) CheckObjCBridgeRelatedConversions(From->getBeginLoc(), ToType, From->getType(), From); - ICS = ::TryImplicitConversion(*this, From, ToType, - /*SuppressUserConversions=*/false, - AllowExplicit ? AllowedExplicit::All - : AllowedExplicit::None, - /*InOverloadResolution=*/false, - /*CStyle=*/false, AllowObjCWritebackConversion, - /*AllowObjCConversionOnExplicit=*/false); + ImplicitConversionSequence ICS = ::TryImplicitConversion( + *this, From, ToType, + /*SuppressUserConversions=*/false, + AllowExplicit ? AllowedExplicit::All : AllowedExplicit::None, + /*InOverloadResolution=*/false, + /*CStyle=*/false, AllowObjCWritebackConversion, + /*AllowObjCConversionOnExplicit=*/false); return PerformImplicitConversion(From, ToType, ICS, Action); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 9d95fa9b447ee..88073dfc39298 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3722,7 +3722,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } case LATE_PARSED_TEMPLATE: - LateParsedTemplates.append(Record.begin(), Record.end()); + LateParsedTemplates.emplace_back( + std::piecewise_construct, std::forward_as_tuple(&F), + std::forward_as_tuple(Record.begin(), Record.end())); break; case OPTIMIZE_PRAGMA_OPTIONS: @@ -8396,25 +8398,28 @@ void ASTReader::ReadPendingInstantiations( void ASTReader::ReadLateParsedTemplates( llvm::MapVector> &LPTMap) { - for (unsigned Idx = 0, N = LateParsedTemplates.size(); Idx < N; - /* In loop */) { - FunctionDecl *FD = cast(GetDecl(LateParsedTemplates[Idx++])); + for (auto &LPT : LateParsedTemplates) { + ModuleFile *FMod = LPT.first; + RecordDataImpl &LateParsed = LPT.second; + for (unsigned Idx = 0, N = LateParsed.size(); Idx < N; + /* In loop */) { + FunctionDecl *FD = + cast(GetLocalDecl(*FMod, LateParsed[Idx++])); - auto LT = std::make_unique(); - LT->D = GetDecl(LateParsedTemplates[Idx++]); + auto LT = std::make_unique(); + LT->D = GetLocalDecl(*FMod, LateParsed[Idx++]); - ModuleFile *F = getOwningModuleFile(LT->D); - assert(F && "No module"); + ModuleFile *F = getOwningModuleFile(LT->D); + assert(F && "No module"); - unsigned TokN = LateParsedTemplates[Idx++]; - LT->Toks.reserve(TokN); - for (unsigned T = 0; T < TokN; ++T) - LT->Toks.push_back(ReadToken(*F, LateParsedTemplates, Idx)); + unsigned TokN = LateParsed[Idx++]; + LT->Toks.reserve(TokN); + for (unsigned T = 0; T < TokN; ++T) + LT->Toks.push_back(ReadToken(*F, LateParsed, Idx)); - LPTMap.insert(std::make_pair(FD, std::move(LT))); + LPTMap.insert(std::make_pair(FD, std::move(LT))); + } } - - LateParsedTemplates.clear(); } void ASTReader::LoadSelector(Selector Sel) { diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 47b378f5727b4..f5a66dc3c2d10 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -585,7 +585,7 @@ void ASTDeclReader::VisitDecl(Decl *D) { Reader.getContext()); } D->setLocation(ThisDeclLoc); - D->setInvalidDecl(Record.readInt()); + D->InvalidDecl = Record.readInt(); if (Record.readInt()) { // hasAttrs AttrVec Attrs; Record.readAttributes(Attrs); diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index a42ed2f3c179d..542e75e77c3a5 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -132,15 +132,38 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, return Missing; } + // The ModuleManager's use of FileEntry nodes as the keys for its map of + // loaded modules is less than ideal. Uniqueness for FileEntry nodes is + // maintained by FileManager, which in turn uses inode numbers on hosts + // that support that. When coupled with the module cache's proclivity for + // turning over and deleting stale PCMs, this means entries for different + // module files can wind up reusing the same underlying inode. When this + // happens, subsequent accesses to the Modules map will disagree on the + // ModuleFile associated with a given file. In general, it is not sufficient + // to resolve this conundrum with a type like FileEntryRef that stores the + // name of the FileEntry node on first access because of path canonicalization + // issues. However, the paths constructed for implicit module builds are + // fully under Clang's control. We *can*, therefore, rely on their structure + // being consistent across operating systems and across subsequent accesses + // to the Modules map. + auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF, + const FileEntry *Entry) -> bool { + if (Kind != MK_ImplicitModule) + return true; + return Entry->getName() == MF->FileName; + }; + // Check whether we already loaded this module, before if (ModuleFile *ModuleEntry = Modules.lookup(Entry)) { - // Check the stored signature. - if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr)) - return OutOfDate; - - Module = ModuleEntry; - updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc); - return AlreadyLoaded; + if (implicitModuleNamesMatch(Type, ModuleEntry, Entry)) { + // Check the stored signature. + if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr)) + return OutOfDate; + + Module = ModuleEntry; + updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc); + return AlreadyLoaded; + } } // Allocate a new module. diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index c65d58e49d785..b71c19a80da90 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -249,15 +249,21 @@ class StdLibraryFunctionsChecker } }; - // Represents a buffer argument with an additional size argument. - // E.g. the first two arguments here: + // Represents a buffer argument with an additional size constraint. The + // constraint may be a concrete value, or a symbolic value in an argument. + // Example 1. Concrete value as the minimum buffer size. + // char *asctime_r(const struct tm *restrict tm, char *restrict buf); + // // `buf` size must be at least 26 bytes according the POSIX standard. + // Example 2. Argument as a buffer size. // ctime_s(char *buffer, rsize_t bufsz, const time_t *time); - // Another example: + // Example 3. The size is computed as a multiplication of other args. // size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); // // Here, ptr is the buffer, and its minimum size is `size * nmemb`. class BufferSizeConstraint : public ValueConstraint { + // The concrete value which is the minimum size for the buffer. + llvm::Optional ConcreteSize; // The argument which holds the size of the buffer. - ArgNo SizeArgN; + llvm::Optional SizeArgN; // The argument which is a multiplier to size. This is set in case of // `fread` like functions where the size is computed as a multiplication of // two arguments. @@ -266,9 +272,10 @@ class StdLibraryFunctionsChecker BinaryOperator::Opcode Op = BO_LE; public: + BufferSizeConstraint(ArgNo Buffer, llvm::APSInt BufMinSize) + : ValueConstraint(Buffer), ConcreteSize(BufMinSize) {} BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize) : ValueConstraint(Buffer), SizeArgN(BufSize) {} - BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize, ArgNo BufSizeMultiplier) : ValueConstraint(Buffer), SizeArgN(BufSize), SizeMultiplierArgN(BufSizeMultiplier) {} @@ -279,14 +286,27 @@ class StdLibraryFunctionsChecker SValBuilder &SvalBuilder = C.getSValBuilder(); // The buffer argument. SVal BufV = getArgSVal(Call, getArgNo()); - // The size argument. - SVal SizeV = getArgSVal(Call, SizeArgN); - // Multiply with another argument if given. - if (SizeMultiplierArgN) { - SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); - SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, - Summary.getArgType(SizeArgN)); - } + + // Get the size constraint. + const SVal SizeV = [this, &State, &Call, &Summary, &SvalBuilder]() { + if (ConcreteSize) { + return SVal(SvalBuilder.makeIntVal(*ConcreteSize)); + } else if (SizeArgN) { + // The size argument. + SVal SizeV = getArgSVal(Call, *SizeArgN); + // Multiply with another argument if given. + if (SizeMultiplierArgN) { + SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); + SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, + Summary.getArgType(*SizeArgN)); + } + return SizeV; + } else { + llvm_unreachable("The constraint must be either a concrete value or " + "encoded in an arguement."); + } + }(); + // The dynamic size of the buffer argument, got from the analyzer engine. SVal BufDynSize = getDynamicSizeWithOffset(State, BufV); @@ -744,21 +764,38 @@ bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, bool StdLibraryFunctionsChecker::Signature::matches( const FunctionDecl *FD) const { assert(!isInvalid()); - // Check number of arguments: + // Check the number of arguments. if (FD->param_size() != ArgTys.size()) return false; - // Check return type. - if (!isIrrelevant(RetTy)) - if (RetTy != FD->getReturnType().getCanonicalType()) + // The "restrict" keyword is illegal in C++, however, many libc + // implementations use the "__restrict" compiler intrinsic in functions + // prototypes. The "__restrict" keyword qualifies a type as a restricted type + // even in C++. + // In case of any non-C99 languages, we don't want to match based on the + // restrict qualifier because we cannot know if the given libc implementation + // qualifies the paramter type or not. + auto RemoveRestrict = [&FD](QualType T) { + if (!FD->getASTContext().getLangOpts().C99) + T.removeLocalRestrict(); + return T; + }; + + // Check the return type. + if (!isIrrelevant(RetTy)) { + QualType FDRetTy = RemoveRestrict(FD->getReturnType().getCanonicalType()); + if (RetTy != FDRetTy) return false; + } - // Check argument types. + // Check the argument types. for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { QualType ArgTy = ArgTys[I]; if (isIrrelevant(ArgTy)) continue; - if (ArgTy != FD->getParamDecl(I)->getType().getCanonicalType()) + QualType FDArgTy = + RemoveRestrict(FD->getParamDecl(I)->getType().getCanonicalType()); + if (ArgTy != FDArgTy) return false; } @@ -914,6 +951,8 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( const QualType ConstWchar_tPtrTy = getPointerTy(getConstTy(WCharTy)); // const wchar_t * const QualType ConstVoidPtrRestrictTy = getRestrictTy(ConstVoidPtrTy); + const QualType SizePtrTy = getPointerTy(SizeTy); + const QualType SizePtrRestrictTy = getRestrictTy(SizePtrTy); const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); const RangeInt UnsignedIntMax = @@ -989,6 +1028,12 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( for (const Summary &S : Summaries) operator()(Name, S); } + // Add the same summary for different names with the Signature explicitly + // given. + void operator()(std::vector Names, Signature Sign, Summary Sum) { + for (StringRef Name : Names) + operator()(Name, Sign, Sum); + } } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries); // Below are helpers functions to create the summaries. @@ -2013,6 +2058,225 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( BufferSize(/*Buffer=*/ArgNo(4), /*BufSize=*/ArgNo(5))) .ArgConstraint( ArgumentCondition(5, WithinRange, Range(0, Socklen_tMax)))); + + Optional StructUtimbufTy = lookupTy("utimbuf"); + Optional StructUtimbufPtrTy = getPointerTy(StructUtimbufTy); + + // int utime(const char *filename, struct utimbuf *buf); + addToFunctionSummaryMap( + "utime", Summary(ArgTypes{ConstCharPtrTy, StructUtimbufPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + Optional StructTimespecTy = lookupTy("timespec"); + Optional StructTimespecPtrTy = getPointerTy(StructTimespecTy); + Optional ConstStructTimespecPtrTy = + getPointerTy(getConstTy(StructTimespecTy)); + + // int futimens(int fd, const struct timespec times[2]); + addToFunctionSummaryMap( + "futimens", Summary(ArgTypes{IntTy, ConstStructTimespecPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(ArgumentCondition(0, WithinRange, + Range(0, IntMax)))); + + // int utimensat(int dirfd, const char *pathname, + // const struct timespec times[2], int flags); + addToFunctionSummaryMap("utimensat", + Summary(ArgTypes{IntTy, ConstCharPtrTy, + ConstStructTimespecPtrTy, IntTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(1)))); + + Optional StructTimevalTy = lookupTy("timeval"); + Optional ConstStructTimevalPtrTy = + getPointerTy(getConstTy(StructTimevalTy)); + + // int utimes(const char *filename, const struct timeval times[2]); + addToFunctionSummaryMap( + "utimes", Summary(ArgTypes{ConstCharPtrTy, ConstStructTimevalPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + // int nanosleep(const struct timespec *rqtp, struct timespec *rmtp); + addToFunctionSummaryMap( + "nanosleep", + Summary(ArgTypes{ConstStructTimespecPtrTy, StructTimespecPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + Optional Time_tTy = lookupTy("time_t"); + Optional ConstTime_tPtrTy = getPointerTy(getConstTy(Time_tTy)); + Optional ConstTime_tPtrRestrictTy = + getRestrictTy(ConstTime_tPtrTy); + + Optional StructTmTy = lookupTy("tm"); + Optional StructTmPtrTy = getPointerTy(StructTmTy); + Optional StructTmPtrRestrictTy = getRestrictTy(StructTmPtrTy); + Optional ConstStructTmPtrTy = + getPointerTy(getConstTy(StructTmTy)); + Optional ConstStructTmPtrRestrictTy = + getRestrictTy(ConstStructTmPtrTy); + + // struct tm * localtime(const time_t *tp); + addToFunctionSummaryMap( + "localtime", + Summary(ArgTypes{ConstTime_tPtrTy}, RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + // struct tm *localtime_r(const time_t *restrict timer, + // struct tm *restrict result); + addToFunctionSummaryMap( + "localtime_r", + Summary(ArgTypes{ConstTime_tPtrRestrictTy, StructTmPtrRestrictTy}, + RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); + + // char *asctime_r(const struct tm *restrict tm, char *restrict buf); + addToFunctionSummaryMap( + "asctime_r", + Summary(ArgTypes{ConstStructTmPtrRestrictTy, CharPtrRestrictTy}, + RetType{CharPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1))) + .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(1), + /*MinBufSize=*/BVF.getValue(26, IntTy)))); + + // char *ctime_r(const time_t *timep, char *buf); + addToFunctionSummaryMap("ctime_r", + Summary(ArgTypes{ConstTime_tPtrTy, CharPtrTy}, + RetType{CharPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1))) + .ArgConstraint(BufferSize( + /*Buffer=*/ArgNo(1), + /*MinBufSize=*/BVF.getValue(26, IntTy)))); + + // struct tm *gmtime_r(const time_t *restrict timer, + // struct tm *restrict result); + addToFunctionSummaryMap( + "gmtime_r", + Summary(ArgTypes{ConstTime_tPtrRestrictTy, StructTmPtrRestrictTy}, + RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); + + // struct tm * gmtime(const time_t *tp); + addToFunctionSummaryMap( + "gmtime", + Summary(ArgTypes{ConstTime_tPtrTy}, RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + Optional Clockid_tTy = lookupTy("clockid_t"); + + // int clock_gettime(clockid_t clock_id, struct timespec *tp); + addToFunctionSummaryMap("clock_gettime", + Summary(ArgTypes{Clockid_tTy, StructTimespecPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(1)))); + + Optional StructItimervalTy = lookupTy("itimerval"); + Optional StructItimervalPtrTy = getPointerTy(StructItimervalTy); + + // int getitimer(int which, struct itimerval *curr_value); + addToFunctionSummaryMap("getitimer", + Summary(ArgTypes{IntTy, StructItimervalPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(1)))); + + Optional Pthread_cond_tTy = lookupTy("pthread_cond_t"); + Optional Pthread_cond_tPtrTy = getPointerTy(Pthread_cond_tTy); + Optional Pthread_tTy = lookupTy("pthread_t"); + Optional Pthread_tPtrTy = getPointerTy(Pthread_tTy); + Optional Pthread_tPtrRestrictTy = getRestrictTy(Pthread_tPtrTy); + Optional Pthread_mutex_tTy = lookupTy("pthread_mutex_t"); + Optional Pthread_mutex_tPtrTy = getPointerTy(Pthread_mutex_tTy); + Optional Pthread_mutex_tPtrRestrictTy = + getRestrictTy(Pthread_mutex_tPtrTy); + Optional Pthread_attr_tTy = lookupTy("pthread_attr_t"); + Optional Pthread_attr_tPtrTy = getPointerTy(Pthread_attr_tTy); + Optional ConstPthread_attr_tPtrTy = + getPointerTy(getConstTy(Pthread_attr_tTy)); + Optional ConstPthread_attr_tPtrRestrictTy = + getRestrictTy(ConstPthread_attr_tPtrTy); + Optional Pthread_mutexattr_tTy = lookupTy("pthread_mutexattr_t"); + Optional ConstPthread_mutexattr_tPtrTy = + getPointerTy(getConstTy(Pthread_mutexattr_tTy)); + Optional ConstPthread_mutexattr_tPtrRestrictTy = + getRestrictTy(ConstPthread_mutexattr_tPtrTy); + + QualType PthreadStartRoutineTy = getPointerTy( + ACtx.getFunctionType(/*ResultTy=*/VoidPtrTy, /*Args=*/VoidPtrTy, + FunctionProtoType::ExtProtoInfo())); + + // int pthread_cond_signal(pthread_cond_t *cond); + // int pthread_cond_broadcast(pthread_cond_t *cond); + addToFunctionSummaryMap( + {"pthread_cond_signal", "pthread_cond_broadcast"}, + Signature(ArgTypes{Pthread_cond_tPtrTy}, RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + + // int pthread_create(pthread_t *restrict thread, + // const pthread_attr_t *restrict attr, + // void *(*start_routine)(void*), void *restrict arg); + addToFunctionSummaryMap( + "pthread_create", + Signature(ArgTypes{Pthread_tPtrRestrictTy, + ConstPthread_attr_tPtrRestrictTy, + PthreadStartRoutineTy, VoidPtrRestrictTy}, + RetType{IntTy}), + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(2)))); + + // int pthread_attr_destroy(pthread_attr_t *attr); + // int pthread_attr_init(pthread_attr_t *attr); + addToFunctionSummaryMap( + {"pthread_attr_destroy", "pthread_attr_init"}, + Signature(ArgTypes{Pthread_attr_tPtrTy}, RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + + // int pthread_attr_getstacksize(const pthread_attr_t *restrict attr, + // size_t *restrict stacksize); + // int pthread_attr_getguardsize(const pthread_attr_t *restrict attr, + // size_t *restrict guardsize); + addToFunctionSummaryMap( + {"pthread_attr_getstacksize", "pthread_attr_getguardsize"}, + Signature(ArgTypes{ConstPthread_attr_tPtrRestrictTy, SizePtrRestrictTy}, + RetType{IntTy}), + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); + + // int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize); + // int pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize); + addToFunctionSummaryMap( + {"pthread_attr_setstacksize", "pthread_attr_setguardsize"}, + Signature(ArgTypes{Pthread_attr_tPtrTy, SizeTy}, RetType{IntTy}), + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint( + ArgumentCondition(1, WithinRange, Range(0, SizeMax)))); + + // int pthread_mutex_init(pthread_mutex_t *restrict mutex, const + // pthread_mutexattr_t *restrict attr); + addToFunctionSummaryMap( + "pthread_mutex_init", + Signature(ArgTypes{Pthread_mutex_tPtrRestrictTy, + ConstPthread_mutexattr_tPtrRestrictTy}, + RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + + // int pthread_mutex_destroy(pthread_mutex_t *mutex); + // int pthread_mutex_lock(pthread_mutex_t *mutex); + // int pthread_mutex_trylock(pthread_mutex_t *mutex); + // int pthread_mutex_unlock(pthread_mutex_t *mutex); + addToFunctionSummaryMap( + {"pthread_mutex_destroy", "pthread_mutex_lock", "pthread_mutex_trylock", + "pthread_mutex_unlock"}, + Signature(ArgTypes{Pthread_mutex_tPtrTy}, RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); } // Functions for testing. @@ -2048,6 +2312,16 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( EvalCallAsPure) .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1), /*BufSizeMultiplier=*/ArgNo(2)))); + addToFunctionSummaryMap( + "__buf_size_arg_constraint_concrete", + Summary(ArgTypes{ConstVoidPtrTy}, RetType{IntTy}, EvalCallAsPure) + .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), + /*BufSize=*/BVF.getValue(10, IntTy)))); + addToFunctionSummaryMap( + {"__test_restrict_param_0", "__test_restrict_param_1", + "__test_restrict_param_2"}, + Signature(ArgTypes{VoidPtrRestrictTy}, RetType{VoidTy}), + Summary(EvalCallAsPure)); } } diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 78d13ddfb773c..a55d9302ca587 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -687,7 +687,7 @@ void CXXInstanceCall::getExtraInvalidatedValues( // base class decl, rather than the class of the instance which needs to be // checked for mutable fields. // TODO: We might as well look at the dynamic type of the object. - const Expr *Ex = getCXXThisExpr()->ignoreParenBaseCasts(); + const Expr *Ex = getCXXThisExpr()->IgnoreParenBaseCasts(); QualType T = Ex->getType(); if (T->isPointerType()) // Arrow or implicit-this syntax? T = T->getPointeeType(); diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp index fe33f9cf8b0ca..03921e0ea7de5 100644 --- a/clang/lib/Tooling/Transformer/RewriteRule.cpp +++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp @@ -242,7 +242,7 @@ class ApplyRuleCallback : public MatchFinder::MatchCallback { } // namespace template -static llvm::Expected> +llvm::Expected> rewriteDescendantsImpl(const T &Node, RewriteRule Rule, const MatchResult &Result) { ApplyRuleCallback Callback(std::move(Rule)); @@ -252,10 +252,43 @@ rewriteDescendantsImpl(const T &Node, RewriteRule Rule, return std::move(Callback.Edits); } +llvm::Expected> +transformer::detail::rewriteDescendants(const Decl &Node, RewriteRule Rule, + const MatchResult &Result) { + return rewriteDescendantsImpl(Node, std::move(Rule), Result); +} + +llvm::Expected> +transformer::detail::rewriteDescendants(const Stmt &Node, RewriteRule Rule, + const MatchResult &Result) { + return rewriteDescendantsImpl(Node, std::move(Rule), Result); +} + +llvm::Expected> +transformer::detail::rewriteDescendants(const TypeLoc &Node, RewriteRule Rule, + const MatchResult &Result) { + return rewriteDescendantsImpl(Node, std::move(Rule), Result); +} + +llvm::Expected> +transformer::detail::rewriteDescendants(const DynTypedNode &DNode, + RewriteRule Rule, + const MatchResult &Result) { + if (const auto *Node = DNode.get()) + return rewriteDescendantsImpl(*Node, std::move(Rule), Result); + if (const auto *Node = DNode.get()) + return rewriteDescendantsImpl(*Node, std::move(Rule), Result); + if (const auto *Node = DNode.get()) + return rewriteDescendantsImpl(*Node, std::move(Rule), Result); + + return llvm::make_error( + llvm::errc::invalid_argument, + "type unsupported for recursive rewriting, Kind=" + + DNode.getNodeKind().asStringRef()); +} + EditGenerator transformer::rewriteDescendants(std::string NodeId, RewriteRule Rule) { - // FIXME: warn or return error if `Rule` contains any `AddedIncludes`, since - // these will be dropped. return [NodeId = std::move(NodeId), Rule = std::move(Rule)](const MatchResult &Result) -> llvm::Expected> { @@ -265,17 +298,7 @@ EditGenerator transformer::rewriteDescendants(std::string NodeId, if (It == NodesMap.end()) return llvm::make_error(llvm::errc::invalid_argument, "ID not bound: " + NodeId); - if (auto *Node = It->second.get()) - return rewriteDescendantsImpl(*Node, std::move(Rule), Result); - if (auto *Node = It->second.get()) - return rewriteDescendantsImpl(*Node, std::move(Rule), Result); - if (auto *Node = It->second.get()) - return rewriteDescendantsImpl(*Node, std::move(Rule), Result); - - return llvm::make_error( - llvm::errc::invalid_argument, - "type unsupported for recursive rewriting, ID=\"" + NodeId + - "\", Kind=" + It->second.getNodeKind().asStringRef()); + return detail::rewriteDescendants(It->second, std::move(Rule), Result); }; } @@ -345,14 +368,13 @@ transformer::detail::buildMatchers(const RewriteRule &Rule) { // Each anyOf explicitly controls the traversal kind. The anyOf itself is set // to `TK_AsIs` to ensure no nodes are skipped, thereby deferring to the kind // of the branches. Then, each branch is either left as is, if the kind is - // already set, or explicitly set to `TK_IgnoreUnlessSpelledInSource`. We - // choose this setting, because we think it is the one most friendly to - // beginners, who are (largely) the target audience of Transformer. + // already set, or explicitly set to `TK_AsIs`. We choose this setting because + // it is the default interpretation of matchers. std::vector Matchers; for (const auto &Bucket : Buckets) { DynTypedMatcher M = DynTypedMatcher::constructVariadic( DynTypedMatcher::VO_AnyOf, Bucket.first, - taggedMatchers("Tag", Bucket.second, TK_IgnoreUnlessSpelledInSource)); + taggedMatchers("Tag", Bucket.second, TK_AsIs)); M.setAllowBind(true); // `tryBind` is guaranteed to succeed, because `AllowBind` was set to true. Matchers.push_back(M.tryBind(RootID)->withTraversalKind(TK_AsIs)); diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index 796b0a0283828..f3925aebbe752 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -34,4 +34,69 @@ float func_03(float x) { // CHECK-NEXT: ParmVarDecl {{.*}} x 'float' // CHECK-NEXT: CompoundStmt // CHECK-NEXT: ReturnStmt -// CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 \ No newline at end of file +// CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 + + + + +#pragma STDC FENV_ROUND FE_DOWNWARD + +float func_10(float x, float y) { + return x + y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_10 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=3 + +float func_11(float x, float y) { + if (x < 0) { + #pragma STDC FENV_ROUND FE_UPWARD + return x + y; + } + return x - y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_11 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=2 +// CHECK: BinaryOperator {{.*}} 'float' '-' RoundingMode=3 + + +#pragma STDC FENV_ROUND FE_DYNAMIC + +float func_12(float x, float y) { + return x + y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_12 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=1 + +#pragma STDC FENV_ROUND FE_TONEAREST + +float func_13(float x, float y) { + return x + y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_13 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=1 + + +template +T func_14(T x, T y) { +#pragma STDC FENV_ROUND FE_TOWARDZERO + return x + y; +} + +float func_15(float x, float y) { +#pragma STDC FPENV_ROUND FE_DOWNWARD + return func_14(x, y); +} + +// CHECK-LABEL: FunctionTemplateDecl {{.*}} func_14 +// CHECK: FunctionDecl {{.*}} func_14 'T (T, T)' +// CHECK: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: BinaryOperator {{.*}} '+' RoundingMode=0 +// CHECK: FunctionDecl {{.*}} func_14 'float (float, float)' +// CHECK: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: BinaryOperator {{.*}} 'float' '+' RoundingMode=0 diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index 7c00e78c16acd..bef786a1a59b6 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -6,11 +6,11 @@ // CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List // CHECK-EMPTY: -// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: apiModeling.StdCLibraryFunctions // CHECK-NEXT: apiModeling.TrustNonnull // CHECK-NEXT: apiModeling.llvm.CastValue // CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.DivideZero // CHECK-NEXT: core.DynamicTypePropagation diff --git a/clang/test/Analysis/std-c-library-functions-POSIX.c b/clang/test/Analysis/std-c-library-functions-POSIX.c index 3638ad100240a..c2c98df864899 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX.c +++ b/clang/test/Analysis/std-c-library-functions-POSIX.c @@ -95,6 +95,33 @@ // CHECK: Loaded summary for: ssize_t send(int sockfd, const void *buf, size_t len, int flags) // CHECK: Loaded summary for: int socketpair(int domain, int type, int protocol, int sv[2]) // CHECK: Loaded summary for: int getnameinfo(const struct sockaddr *restrict sa, socklen_t salen, char *restrict node, socklen_t nodelen, char *restrict service, socklen_t servicelen, int flags) +// CHECK: Loaded summary for: int utime(const char *filename, struct utimbuf *buf) +// CHECK: Loaded summary for: int futimens(int fd, const struct timespec times[2]) +// CHECK: Loaded summary for: int utimensat(int dirfd, const char *pathname, const struct timespec times[2], int flags) +// CHECK: Loaded summary for: int utimes(const char *filename, const struct timeval times[2]) +// CHECK: Loaded summary for: int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +// CHECK: Loaded summary for: struct tm *localtime(const time_t *tp) +// CHECK: Loaded summary for: struct tm *localtime_r(const time_t *restrict timer, struct tm *restrict result) +// CHECK: Loaded summary for: char *asctime_r(const struct tm *restrict tm, char *restrict buf) +// CHECK: Loaded summary for: char *ctime_r(const time_t *timep, char *buf) +// CHECK: Loaded summary for: struct tm *gmtime_r(const time_t *restrict timer, struct tm *restrict result) +// CHECK: Loaded summary for: struct tm *gmtime(const time_t *tp) +// CHECK: Loaded summary for: int clock_gettime(clockid_t clock_id, struct timespec *tp) +// CHECK: Loaded summary for: int getitimer(int which, struct itimerval *curr_value) +// CHECK: Loaded summary for: int pthread_cond_signal(pthread_cond_t *cond) +// CHECK: Loaded summary for: int pthread_cond_broadcast(pthread_cond_t *cond) +// CHECK: Loaded summary for: int pthread_create(pthread_t *restrict thread, const pthread_attr_t *restrict attr, void *(*start_routine)(void *), void *restrict arg) +// CHECK: Loaded summary for: int pthread_attr_destroy(pthread_attr_t *attr) +// CHECK: Loaded summary for: int pthread_attr_init(pthread_attr_t *attr) +// CHECK: Loaded summary for: int pthread_attr_getstacksize(const pthread_attr_t *restrict attr, size_t *restrict stacksize) +// CHECK: Loaded summary for: int pthread_attr_getguardsize(const pthread_attr_t *restrict attr, size_t *restrict guardsize) +// CHECK: Loaded summary for: int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize) +// CHECK: Loaded summary for: int pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize) +// CHECK: Loaded summary for: int pthread_mutex_init(pthread_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr) +// CHECK: Loaded summary for: int pthread_mutex_destroy(pthread_mutex_t *mutex) +// CHECK: Loaded summary for: int pthread_mutex_lock(pthread_mutex_t *mutex) +// CHECK: Loaded summary for: int pthread_mutex_trylock(pthread_mutex_t *mutex) +// CHECK: Loaded summary for: int pthread_mutex_unlock(pthread_mutex_t *mutex) long a64l(const char *str64); char *l64a(long value); @@ -226,6 +253,53 @@ int getsockopt(int socket, int level, int option_name, void *restrict option_val ssize_t send(int sockfd, const void *buf, size_t len, int flags); int socketpair(int domain, int type, int protocol, int sv[2]); int getnameinfo(const struct sockaddr *restrict sa, socklen_t salen, char *restrict node, socklen_t nodelen, char *restrict service, socklen_t servicelen, int flags); +struct utimbuf; +struct timespec { int x; }; +struct timeval { int x; }; +int utime(const char *filename, struct utimbuf *buf); +int futimens(int fd, const struct timespec times[2]); +int utimensat(int dirfd, const char *pathname, const struct timespec times[2], int flags); +int utimes(const char *filename, const struct timeval times[2]); +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp); +typedef unsigned long time_t; +struct tm *localtime(const time_t *tp); +struct tm *localtime_r(const time_t *restrict timer, struct tm *restrict result); +char *asctime_r(const struct tm *restrict tm, char *restrict buf); +char *ctime_r(const time_t *timep, char *buf); +struct tm *gmtime_r(const time_t *restrict timer, struct tm *restrict result); +struct tm *gmtime(const time_t *tp); +typedef unsigned long clockid_t; +int clock_gettime(clockid_t clock_id, struct timespec *tp); +struct itimerval; +int getitimer(int which, struct itimerval *curr_value); + +typedef union { + int x; +} pthread_cond_t; +int pthread_cond_signal(pthread_cond_t *cond); +int pthread_cond_broadcast(pthread_cond_t *cond); +typedef union { + int x; +} pthread_attr_t; +typedef unsigned long int pthread_t; +int pthread_create(pthread_t *restrict thread, const pthread_attr_t *restrict attr, void *(*start_routine)(void *), void *restrict arg); +int pthread_attr_destroy(pthread_attr_t *attr); +int pthread_attr_init(pthread_attr_t *attr); +int pthread_attr_getstacksize(const pthread_attr_t *restrict attr, size_t *restrict stacksize); +int pthread_attr_getguardsize(const pthread_attr_t *restrict attr, size_t *restrict guardsize); +int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize); +int pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize); +typedef union { + int x; +} pthread_mutex_t; +typedef union { + int x; +} pthread_mutexattr_t; +int pthread_mutex_init(pthread_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr); +int pthread_mutex_destroy(pthread_mutex_t *mutex); +int pthread_mutex_lock(pthread_mutex_t *mutex); +int pthread_mutex_trylock(pthread_mutex_t *mutex); +int pthread_mutex_unlock(pthread_mutex_t *mutex); // Must have at least one call expression to initialize the summary map. int bar(void); diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c index e926cd15384d1..28979abd43b58 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -256,6 +256,7 @@ void test_buf_size_symbolic_and_offset(int s) { // bugpath-note{{TRUE}} \ // bugpath-note{{'s' is <= 2}} } + int __buf_size_arg_constraint_mul(const void *, size_t, size_t); void test_buf_size_concrete_with_multiplication() { short buf[3]; // bugpath-note{{'buf' initialized here}} @@ -280,3 +281,13 @@ void test_buf_size_symbolic_and_offset_with_multiplication(size_t s) { // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} } + +// The minimum buffer size for this function is set to 10. +int __buf_size_arg_constraint_concrete(const void *); +void test_min_buf_size() { + char buf[9];// bugpath-note{{'buf' initialized here}} + __buf_size_arg_constraint_concrete(buf); // \ + // report-warning{{Function argument constraint is not satisfied}} \ + // bugpath-warning{{Function argument constraint is not satisfied}} \ + // bugpath-note{{Function argument constraint is not satisfied}} +} diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c new file mode 100644 index 0000000000000..9ad1be0538517 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c @@ -0,0 +1,66 @@ +// Here we test the order of the Checkers when StdCLibraryFunctionArgs is +// enabled. + +// RUN: %clang --analyze %s --target=x86_64-pc-linux-gnu \ +// RUN: -Xclang -analyzer-checker=core \ +// RUN: -Xclang -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -Xclang -analyzer-config \ +// RUN: -Xclang apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -Xclang -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -Xclang -analyzer-checker=alpha.unix.Stream \ +// RUN: -Xclang -analyzer-list-enabled-checkers \ +// RUN: -Xclang -analyzer-display-progress \ +// RUN: 2>&1 | FileCheck %s --implicit-check-not=ANALYZE \ +// RUN: --implicit-check-not=\. + +// CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List +// CHECK-EMPTY: +// CHECK-NEXT: core.CallAndMessageModeling +// CHECK-NEXT: core.CallAndMessage +// CHECK-NEXT: core.NonNullParamChecker +// CHECK-NEXT: alpha.unix.Stream +// CHECK-NEXT: apiModeling.StdCLibraryFunctions +// CHECK-NEXT: alpha.unix.StdCLibraryFunctionArgs +// CHECK-NEXT: apiModeling.TrustNonnull +// CHECK-NEXT: apiModeling.llvm.CastValue +// CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.DivideZero +// CHECK-NEXT: core.DynamicTypePropagation +// CHECK-NEXT: core.NonnilStringConstants +// CHECK-NEXT: core.NullDereference +// CHECK-NEXT: core.StackAddrEscapeBase +// CHECK-NEXT: core.StackAddressEscape +// CHECK-NEXT: core.UndefinedBinaryOperatorResult +// CHECK-NEXT: core.VLASize +// CHECK-NEXT: core.builtin.BuiltinFunctions +// CHECK-NEXT: core.builtin.NoReturnFunctions +// CHECK-NEXT: core.uninitialized.ArraySubscript +// CHECK-NEXT: core.uninitialized.Assign +// CHECK-NEXT: core.uninitialized.Branch +// CHECK-NEXT: core.uninitialized.CapturedBlockVariable +// CHECK-NEXT: core.uninitialized.UndefReturn +// CHECK-NEXT: deadcode.DeadStores +// CHECK-NEXT: nullability.NullabilityBase +// CHECK-NEXT: nullability.NullPassedToNonnull +// CHECK-NEXT: nullability.NullReturnedFromNonnull +// CHECK-NEXT: security.insecureAPI.SecuritySyntaxChecker +// CHECK-NEXT: security.insecureAPI.UncheckedReturn +// CHECK-NEXT: security.insecureAPI.getpw +// CHECK-NEXT: security.insecureAPI.gets +// CHECK-NEXT: security.insecureAPI.mkstemp +// CHECK-NEXT: security.insecureAPI.mktemp +// CHECK-NEXT: security.insecureAPI.vfork +// CHECK-NEXT: unix.API +// CHECK-NEXT: unix.cstring.CStringModeling +// CHECK-NEXT: unix.DynamicMemoryModeling +// CHECK-NEXT: unix.Malloc +// CHECK-NEXT: unix.MallocSizeof +// CHECK-NEXT: unix.MismatchedDeallocator +// CHECK-NEXT: unix.Vfork +// CHECK-NEXT: unix.cstring.BadSizeArg +// CHECK-NEXT: unix.cstring.NullArg + +int main() { + int i; + (void)(10 / i); +} diff --git a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c new file mode 100644 index 0000000000000..0ad3c277dfd7d --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c @@ -0,0 +1,64 @@ +// Check that the more specific checkers report and not the generic +// StdCLibraryFunctionArgs checker. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -triple x86_64-unknown-linux-gnu \ +// RUN: -verify + + +// Make sure that all used functions have their summary loaded. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple x86_64-unknown-linux 2>&1 | FileCheck %s + +// CHECK: Loaded summary for: int isalnum(int) +// CHECK: Loaded summary for: unsigned long fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))) +// CHECK: Loaded summary for: int fileno(FILE *stream) + +void initializeSummaryMap(); +// We analyze this function first, and the call expression inside initializes +// the summary map. This way we force the loading of the summaries. The +// summaries would not be loaded without this because during the first bug +// report in WeakDependency::checkPreCall we stop further evaluation. And +// StdLibraryFunctionsChecker lazily initializes its summary map from its +// checkPreCall. +void analyzeThisFirst() { + initializeSummaryMap(); +} + +typedef __typeof(sizeof(int)) size_t; +struct FILE; +typedef struct FILE FILE; + +int isalnum(int); +size_t fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))); +int fileno(FILE *stream); + +void test_uninit_arg() { + int v; + int r = isalnum(v); // \ + // expected-warning{{1st function call argument is an uninitialized value [core.CallAndMessage]}} + (void)r; +} + +void test_notnull_arg(FILE *F) { + int *p = 0; + fread(p, sizeof(int), 5, F); // \ + expected-warning{{Null pointer passed to 1st parameter expecting 'nonnull' [core.NonNullParamChecker]}} +} + +void test_notnull_stream_arg() { + fileno(0); // \ + // expected-warning{{Stream pointer might be NULL [alpha.unix.Stream]}} +} diff --git a/clang/test/Analysis/std-c-library-functions-restrict.c b/clang/test/Analysis/std-c-library-functions-restrict.c new file mode 100644 index 0000000000000..7cf5f2bc630a3 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-restrict.c @@ -0,0 +1,24 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s + +// The signatures for these functions are the same and they specify their +// parameter with the restrict qualifier. In C, the signature should match only +// if the restrict qualifier is there on the parameter. Thus, the summary +// should be loaded for the last two declarations only. +void __test_restrict_param_0(void *p); +void __test_restrict_param_1(void *__restrict p); +void __test_restrict_param_2(void *restrict p); + +// CHECK-NOT: Loaded summary for: void __test_restrict_param_0 +// CHECK: Loaded summary for: void __test_restrict_param_1(void *restrict p) +// CHECK: Loaded summary for: void __test_restrict_param_2(void *restrict p) + +// Must have at least one call expression to initialize the summary map. +int bar(void); +void foo() { + bar(); +} diff --git a/clang/test/Analysis/std-c-library-functions-restrict.cpp b/clang/test/Analysis/std-c-library-functions-restrict.cpp new file mode 100644 index 0000000000000..d1cd090f5ef85 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-restrict.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s + +// The signatures for these functions are the same and they specify their +// parameter with the restrict qualifier. In C++, however, we are more +// indulgent and we do not match based on this qualifier. Thus, the given +// signature should match for both of the declarations below, i.e the summary +// should be loaded for both of them. +void __test_restrict_param_0(void *p); +void __test_restrict_param_1(void *__restrict p); +// The below declaration is illegal, "restrict" is not a keyword in C++. +// void __test_restrict_param_2(void *restrict p); + +// CHECK: Loaded summary for: void __test_restrict_param_0(void *p) +// CHECK: Loaded summary for: void __test_restrict_param_1(void *__restrict p) + +// Must have at least one call expression to initialize the summary map. +int bar(void); +void foo() { + bar(); +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c index 03ab37474ba02..a656657b66197 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include @@ -9,8 +9,8 @@ // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <16 x i8> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[TMP2]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[A]], <16 x i8> [[TMP2]] // CHECK-NEXT: ret <16 x i8> [[TMP4]] // uint8x16_t test_vmaxaq_s8(uint8x16_t a, int8x16_t b) @@ -27,8 +27,8 @@ uint8x16_t test_vmaxaq_s8(uint8x16_t a, int8x16_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <8 x i16> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[A]], <8 x i16> [[TMP2]] // CHECK-NEXT: ret <8 x i16> [[TMP4]] // uint16x8_t test_vmaxaq_s16(uint16x8_t a, int16x8_t b) @@ -45,8 +45,8 @@ uint16x8_t test_vmaxaq_s16(uint16x8_t a, int16x8_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[A]], <4 x i32> [[TMP2]] // CHECK-NEXT: ret <4 x i32> [[TMP4]] // uint32x4_t test_vmaxaq_s32(uint32x4_t a, int32x4_t b) @@ -61,8 +61,8 @@ uint32x4_t test_vmaxaq_s32(uint32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vmaxaq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.vmaxa.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmaxa.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmaxaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) @@ -77,8 +77,8 @@ uint8x16_t test_vmaxaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxaq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.vmaxa.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmaxa.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmaxaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) @@ -93,8 +93,8 @@ uint16x8_t test_vmaxaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxaq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.vmaxa.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmaxa.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmaxaq_m_s32(uint32x4_t a, int32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c index 20c22056d52a5..52b439fe5555f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vmaxnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) @@ -22,9 +22,9 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vmaxnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) @@ -39,8 +39,8 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vmaxnmaq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -55,8 +55,8 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxnmaq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c index 5caf8d6421feb..19b5d28a52440 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vmaxnmq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) @@ -20,7 +20,7 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vmaxnmq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) @@ -35,8 +35,8 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vmaxnmq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -51,8 +51,8 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t // CHECK-LABEL: @test_vmaxnmq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) @@ -67,8 +67,8 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t // CHECK-LABEL: @test_vmaxnmq_x_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -83,8 +83,8 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxnmq_x_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c index d0ddc7a99e9f8..7fb2f5191f440 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vmaxq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[B]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) @@ -21,8 +21,8 @@ int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) // CHECK-LABEL: @test_vmaxq_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <8 x i16> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[B]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) @@ -36,8 +36,8 @@ uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) // CHECK-LABEL: @test_vmaxq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) @@ -52,8 +52,8 @@ int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vmaxq_m_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) @@ -68,8 +68,8 @@ uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_ // CHECK-LABEL: @test_vmaxq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) @@ -84,8 +84,8 @@ int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pre // CHECK-LABEL: @test_vmaxq_m_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) @@ -100,8 +100,8 @@ uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve // CHECK-LABEL: @test_vmaxq_x_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmaxq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) @@ -116,8 +116,8 @@ uint8x16_t test_vmaxq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxq_x_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmaxq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) @@ -132,8 +132,8 @@ uint16x8_t test_vmaxq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxq_x_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmaxq_x_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c index b23dc1a27be86..6a6279cce0df2 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include @@ -9,8 +9,8 @@ // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ult <16 x i8> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[TMP2]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ule <16 x i8> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[A]], <16 x i8> [[TMP2]] // CHECK-NEXT: ret <16 x i8> [[TMP4]] // uint8x16_t test_vminaq_s8(uint8x16_t a, int8x16_t b) @@ -27,8 +27,8 @@ uint8x16_t test_vminaq_s8(uint8x16_t a, int8x16_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i16> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ule <8 x i16> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[A]], <8 x i16> [[TMP2]] // CHECK-NEXT: ret <8 x i16> [[TMP4]] // uint16x8_t test_vminaq_s16(uint16x8_t a, int16x8_t b) @@ -45,8 +45,8 @@ uint16x8_t test_vminaq_s16(uint16x8_t a, int16x8_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[A]], <4 x i32> [[TMP2]] // CHECK-NEXT: ret <4 x i32> [[TMP4]] // uint32x4_t test_vminaq_s32(uint32x4_t a, int32x4_t b) @@ -61,8 +61,8 @@ uint32x4_t test_vminaq_s32(uint32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vminaq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.vmina.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmina.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vminaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) @@ -77,8 +77,8 @@ uint8x16_t test_vminaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminaq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.vmina.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmina.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vminaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) @@ -93,8 +93,8 @@ uint16x8_t test_vminaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminaq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.vmina.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmina.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vminaq_m_s32(uint32x4_t a, int32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c index b13d851aec79b..5ddc3914f1857 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vminnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) @@ -22,9 +22,9 @@ float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vminnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) @@ -39,8 +39,8 @@ float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vminnmaq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -55,8 +55,8 @@ float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminnmaq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c index 7ebcf45d88330..0723dfae2f064 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vminnmq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) @@ -20,7 +20,7 @@ float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vminnmq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) @@ -35,8 +35,8 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vminnmq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -51,8 +51,8 @@ float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t // CHECK-LABEL: @test_vminnmq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) @@ -67,8 +67,8 @@ float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t // CHECK-LABEL: @test_vminnmq_x_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -83,8 +83,8 @@ float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminnmq_x_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c index d4186858b121a..1f3b0d670ee17 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vminq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[B]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) @@ -21,8 +21,8 @@ uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) // CHECK-LABEL: @test_vminq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[B]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) @@ -36,8 +36,8 @@ int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) // CHECK-LABEL: @test_vminq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) @@ -52,8 +52,8 @@ uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) // CHECK-LABEL: @test_vminq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) @@ -68,8 +68,8 @@ int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred // CHECK-LABEL: @test_vminq_m_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) @@ -84,8 +84,8 @@ uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve // CHECK-LABEL: @test_vminq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) @@ -100,8 +100,8 @@ int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pre // CHECK-LABEL: @test_vminq_x_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vminq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) @@ -116,8 +116,8 @@ uint8x16_t test_vminq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminq_x_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vminq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) @@ -132,8 +132,8 @@ int16x8_t test_vminq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminq_x_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vminq_x_s32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index f6b8b1be1e76b..cab424c3dbe17 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -6,7 +6,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index 412923f1e898e..490ec92dfdeb5 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -4,7 +4,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index 6c7edf9033f76..13d8f14f991a8 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -4,7 +4,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c index d93be54a499cb..1a6a68a2e1f4f 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c @@ -3,7 +3,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index be0b314334b9d..d567c718000c8 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -5,7 +5,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N))); typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c index 625e096bf3d6f..a1cfc514081ea 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c @@ -7,7 +7,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index 95659895eeaf9..f3de6d1b87474 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -8,25 +8,19 @@ __m256i test_mm256_abs_epi8(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, %{{.*}} - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> %{{.*}}, zeroinitializer - // CHECK: select <32 x i1> [[CMP]], <32 x i8> %{{.*}}, <32 x i8> [[SUB]] + // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) return _mm256_abs_epi8(a); } __m256i test_mm256_abs_epi16(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, %{{.*}} - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> %{{.*}}, zeroinitializer - // CHECK: select <16 x i1> [[CMP]], <16 x i16> %{{.*}}, <16 x i16> [[SUB]] + // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) return _mm256_abs_epi16(a); } __m256i test_mm256_abs_epi32(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, %{{.*}} - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> %{{.*}}, zeroinitializer - // CHECK: select <8 x i1> [[CMP]], <8 x i32> %{{.*}}, <8 x i32> [[SUB]] + // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) return _mm256_abs_epi32(a); } diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c index c08b354d9519b..cc173f1a9cfe6 100644 --- a/clang/test/CodeGen/avx512bw-builtins.c +++ b/clang/test/CodeGen/avx512bw-builtins.c @@ -878,48 +878,36 @@ __m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { } __m512i test_mm512_abs_epi8(__m512i __A) { // CHECK-LABEL: @test_mm512_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <64 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], zeroinitializer - // CHECK: select <64 x i1> [[CMP]], <64 x i8> [[A]], <64 x i8> [[SUB]] + // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) return _mm512_abs_epi8(__A); } __m512i test_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <64 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[A]], <64 x i8> [[SUB]] - // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[SEL]], <64 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[ABS]], <64 x i8> %{{.*}} return _mm512_mask_abs_epi8(__W,__U,__A); } __m512i test_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <64 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[A]], <64 x i8> [[SUB]] - // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[SEL]], <64 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[ABS]], <64 x i8> %{{.*}} return _mm512_maskz_abs_epi8(__U,__A); } __m512i test_mm512_abs_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <32 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], zeroinitializer - // CHECK: select <32 x i1> [[CMP]], <32 x i16> [[A]], <32 x i16> [[SUB]] + // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) return _mm512_abs_epi16(__A); } __m512i test_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <32 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[A]], <32 x i16> [[SUB]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[SEL]], <32 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[ABS]], <32 x i16> %{{.*}} return _mm512_mask_abs_epi16(__W,__U,__A); } __m512i test_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <32 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[A]], <32 x i16> [[SUB]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[SEL]], <32 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[ABS]], <32 x i16> %{{.*}} return _mm512_maskz_abs_epi16(__U,__A); } __m512i test_mm512_packs_epi32(__m512i __A, __m512i __B) { diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index b01300c424b72..fb5db4c321748 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -10467,44 +10467,36 @@ __m512 test_mm512_set_ps (float __A, float __B, float __C, float __D, __m512i test_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <8 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[A]], <8 x i64> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[SEL]], <8 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_mask_abs_epi64 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <8 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[A]], <8 x i64> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[SEL]], <8 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_maskz_abs_epi64 (__U,__A); } __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> - // CHECK: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[ABS]] to <8 x i64> + // CHECK: [[ABS:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[ABS]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> - // CHECK: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[ABS]] to <8 x i64> + // CHECK: [[ABS:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[ABS]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index 3348e05790bcf..e7965119fb4b9 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -4537,90 +4537,68 @@ __m256 test_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { } __m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> - // CHECK: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[ABS]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } __m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> - // CHECK: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[ABS]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } __m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> - // CHECK: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[ABS]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> - // CHECK: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[ABS]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } __m128i test_mm_abs_epi64(__m128i __A) { // CHECK-LABEL: @test_mm_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - + // CHECK: [[ABS:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) return _mm_abs_epi64(__A); } __m128i test_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[SEL]], <2 x i64> %{{.*}} - + // CHECK: [[ABS:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[ABS]], <2 x i64> %{{.*}} return _mm_mask_abs_epi64(__W,__U,__A); } __m128i test_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[SEL]], <2 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[ABS]], <2 x i64> %{{.*}} return _mm_maskz_abs_epi64(__U,__A); } __m256i test_mm256_abs_epi64(__m256i __A) { // CHECK-LABEL: @test_mm256_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] + // CHECK: [[ABS:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) return _mm256_abs_epi64(__A); } __m256i test_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[SEL]], <4 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[ABS]], <4 x i64> %{{.*}} return _mm256_mask_abs_epi64(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[SEL]], <4 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[ABS]], <4 x i64> %{{.*}} return _mm256_maskz_abs_epi64(__U,__A); } __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c index 0cbd8a1a595fb..df2adfdb97be6 100644 --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -897,89 +897,73 @@ __m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i8> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <16 x i8> + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } __m128i test_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i8> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <16 x i8> + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } __m256i test_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <32 x i8> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <32 x i8> + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[ABS]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <32 x i8> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <32 x i8> + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[ABS]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } __m128i test_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i16> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <8 x i16> + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[ABS]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } __m128i test_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i16> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <8 x i16> + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[ABS]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } __m256i test_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i16> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <16 x i16> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[ABS]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i16> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <16 x i16> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[ABS]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c index d53011b37d413..06f70a9019039 100644 --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -38,6 +38,13 @@ vector float res_vf; // CHECK-NOALTIVEC: error: unknown type name 'vector' // CHECK-NOALTIVEC-NOT: '(error)' +const signed char *param_sc_ld; +const unsigned char *param_uc_ld; +const short *param_s_ld; +const unsigned short *param_us_ld; +const int *param_i_ld; +const unsigned int *param_ui_ld; +const float *param_f_ld; signed char param_sc; unsigned char param_uc; @@ -1029,6 +1036,85 @@ void test2() { // CHECK: @llvm.ppc.altivec.vcmpeqfp // CHECK-LE: @llvm.ppc.altivec.vcmpeqfp + /* vec_cmpne */ + res_vbc = vec_cmpne(vsc, vsc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbc = vec_cmpne(vuc, vuc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbc = vec_cmpne(vbc, vbc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbc = vec_cmpne(vbc, vbc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbs = vec_cmpne(vs, vs); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbs = vec_cmpne(vus, vus); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbs = vec_cmpne(vbs, vbs); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbs = vec_cmpne(vbs, vbs); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbi = vec_cmpne(vi, vi); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vui, vui); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vbi, vbi); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vbi, vbi); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vf, vf); +// CHECK: @llvm.ppc.altivec.vcmpeqfp +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpeqfp +// CHECK-LE: xor + /* vec_cmpge */ res_vbc = vec_cmpge(vsc, vsc); // CHECK: @llvm.ppc.altivec.vcmpgtsb @@ -1313,7 +1399,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vsc = vec_ld(0, ¶m_sc); + res_vsc = vec_ld(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1321,7 +1407,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vuc = vec_ld(0, ¶m_uc); + res_vuc = vec_ld(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1333,7 +1419,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vs = vec_ld(0, ¶m_s); + res_vs = vec_ld(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1341,7 +1427,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vus = vec_ld(0, ¶m_us); + res_vus = vec_ld(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1357,7 +1443,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vi = vec_ld(0, ¶m_i); + res_vi = vec_ld(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1365,7 +1451,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vui = vec_ld(0, ¶m_ui); + res_vui = vec_ld(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1377,7 +1463,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vf = vec_ld(0, ¶m_f); + res_vf = vec_ld(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1385,7 +1471,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vsc = vec_lvx(0, ¶m_sc); + res_vsc = vec_lvx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1393,7 +1479,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vuc = vec_lvx(0, ¶m_uc); + res_vuc = vec_lvx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1405,7 +1491,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vs = vec_lvx(0, ¶m_s); + res_vs = vec_lvx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1413,7 +1499,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vus = vec_lvx(0, ¶m_us); + res_vus = vec_lvx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1429,7 +1515,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vi = vec_lvx(0, ¶m_i); + res_vi = vec_lvx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1437,7 +1523,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vui = vec_lvx(0, ¶m_ui); + res_vui = vec_lvx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1449,64 +1535,64 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vf = vec_lvx(0, ¶m_f); + res_vf = vec_lvx(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx /* vec_lde */ - res_vsc = vec_lde(0, ¶m_sc); + res_vsc = vec_lde(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vuc = vec_lde(0, ¶m_uc); + res_vuc = vec_lde(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vs = vec_lde(0, ¶m_s); + res_vs = vec_lde(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vus = vec_lde(0, ¶m_us); + res_vus = vec_lde(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vi = vec_lde(0, ¶m_i); + res_vi = vec_lde(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vui = vec_lde(0, ¶m_ui); + res_vui = vec_lde(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vf = vec_lde(0, ¶m_f); + res_vf = vec_lde(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vsc = vec_lvebx(0, ¶m_sc); + res_vsc = vec_lvebx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vuc = vec_lvebx(0, ¶m_uc); + res_vuc = vec_lvebx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vs = vec_lvehx(0, ¶m_s); + res_vs = vec_lvehx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vus = vec_lvehx(0, ¶m_us); + res_vus = vec_lvehx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vi = vec_lvewx(0, ¶m_i); + res_vi = vec_lvewx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vui = vec_lvewx(0, ¶m_ui); + res_vui = vec_lvewx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vf = vec_lvewx(0, ¶m_f); + res_vf = vec_lvewx(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx @@ -1515,7 +1601,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vsc = vec_ldl(0, ¶m_sc); + res_vsc = vec_ldl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1523,7 +1609,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vuc = vec_ldl(0, ¶m_uc); + res_vuc = vec_ldl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1535,7 +1621,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vs = vec_ldl(0, ¶m_s); + res_vs = vec_ldl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1543,7 +1629,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vus = vec_ldl(0, ¶m_us); + res_vus = vec_ldl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1559,7 +1645,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vi = vec_ldl(0, ¶m_i); + res_vi = vec_ldl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1567,7 +1653,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vui = vec_ldl(0, ¶m_ui); + res_vui = vec_ldl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1579,7 +1665,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vf = vec_ldl(0, ¶m_f); + res_vf = vec_ldl(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1587,7 +1673,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vsc = vec_lvxl(0, ¶m_sc); + res_vsc = vec_lvxl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1599,7 +1685,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vuc = vec_lvxl(0, ¶m_uc); + res_vuc = vec_lvxl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1607,7 +1693,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vs = vec_lvxl(0, ¶m_s); + res_vs = vec_lvxl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1615,7 +1701,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vus = vec_lvxl(0, ¶m_us); + res_vus = vec_lvxl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1631,7 +1717,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vi = vec_lvxl(0, ¶m_i); + res_vi = vec_lvxl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1639,7 +1725,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vui = vec_lvxl(0, ¶m_ui); + res_vui = vec_lvxl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1651,7 +1737,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vf = vec_lvxl(0, ¶m_f); + res_vf = vec_lvxl(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1665,12 +1751,12 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vlogefp /* vec_lvsl */ - res_vuc = vec_lvsl(0, ¶m_i); + res_vuc = vec_lvsl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.lvsl /* vec_lvsr */ - res_vuc = vec_lvsr(0, ¶m_i); + res_vuc = vec_lvsr(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvsr // CHECK-LE: @llvm.ppc.altivec.lvsr @@ -6029,7 +6115,7 @@ void test6() { // CHECK-LE: insertelement <4 x float> /* vec_lvlx */ - res_vsc = vec_lvlx(0, ¶m_sc); + res_vsc = vec_lvlx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6049,7 +6135,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvlx(0, ¶m_uc); + res_vuc = vec_lvlx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6079,7 +6165,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvlx(0, ¶m_s); + res_vs = vec_lvlx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6099,7 +6185,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvlx(0, ¶m_us); + res_vus = vec_lvlx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6139,7 +6225,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvlx(0, ¶m_i); + res_vi = vec_lvlx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6159,7 +6245,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvlx(0, ¶m_ui); + res_vui = vec_lvlx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6200,7 +6286,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_lvlxl */ - res_vsc = vec_lvlxl(0, ¶m_sc); + res_vsc = vec_lvlxl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6220,7 +6306,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvlxl(0, ¶m_uc); + res_vuc = vec_lvlxl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6250,7 +6336,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvlxl(0, ¶m_s); + res_vs = vec_lvlxl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6270,7 +6356,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvlxl(0, ¶m_us); + res_vus = vec_lvlxl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6310,7 +6396,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvlxl(0, ¶m_i); + res_vi = vec_lvlxl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6330,7 +6416,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvlxl(0, ¶m_ui); + res_vui = vec_lvlxl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6371,7 +6457,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_lvrx */ - res_vsc = vec_lvrx(0, ¶m_sc); + res_vsc = vec_lvrx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6391,7 +6477,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvrx(0, ¶m_uc); + res_vuc = vec_lvrx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6421,7 +6507,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvrx(0, ¶m_s); + res_vs = vec_lvrx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6441,7 +6527,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvrx(0, ¶m_us); + res_vus = vec_lvrx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6481,7 +6567,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvrx(0, ¶m_i); + res_vi = vec_lvrx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6501,7 +6587,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvrx(0, ¶m_ui); + res_vui = vec_lvrx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6542,7 +6628,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_lvrxl */ - res_vsc = vec_lvrxl(0, ¶m_sc); + res_vsc = vec_lvrxl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6562,7 +6648,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvrxl(0, ¶m_uc); + res_vuc = vec_lvrxl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6592,7 +6678,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvrxl(0, ¶m_s); + res_vs = vec_lvrxl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6612,7 +6698,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvrxl(0, ¶m_us); + res_vus = vec_lvrxl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6652,7 +6738,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvrxl(0, ¶m_i); + res_vi = vec_lvrxl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6672,7 +6758,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvrxl(0, ¶m_ui); + res_vui = vec_lvrxl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -9354,31 +9440,31 @@ void test8() { void test9() { // CHECK-LABEL: define void @test9 // CHECK-LE-LABEL: define void @test9 - res_vsc = vec_xl(param_sll, ¶m_sc); + res_vsc = vec_xl(param_sll, param_sc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 - res_vuc = vec_xl(param_sll, ¶m_uc); + res_vuc = vec_xl(param_sll, param_uc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 - res_vs = vec_xl(param_sll, ¶m_s); + res_vs = vec_xl(param_sll, param_s_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 - res_vus = vec_xl(param_sll, ¶m_us); + res_vus = vec_xl(param_sll, param_us_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 - res_vi = vec_xl(param_sll, ¶m_i); + res_vi = vec_xl(param_sll, param_i_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 - res_vui = vec_xl(param_sll, ¶m_ui); + res_vui = vec_xl(param_sll, param_ui_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 - res_vf = vec_xl(param_sll, ¶m_f); + res_vf = vec_xl(param_sll, param_f_ld); // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 } @@ -9420,35 +9506,35 @@ void test10() { void test11() { // CHECK-LABEL: define void @test11 // CHECK-LE-LABEL: define void @test11 - res_vsc = vec_xl_be(param_sll, ¶m_sc); + res_vsc = vec_xl_be(param_sll, param_sc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> - res_vuc = vec_xl_be(param_sll, ¶m_uc); + res_vuc = vec_xl_be(param_sll, param_uc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> - res_vs = vec_xl_be(param_sll, ¶m_s); + res_vs = vec_xl_be(param_sll, param_s_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> - res_vus = vec_xl_be(param_sll, ¶m_us); + res_vus = vec_xl_be(param_sll, param_us_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> - res_vi = vec_xl_be(param_sll, ¶m_i); + res_vi = vec_xl_be(param_sll, param_i_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) - res_vui = vec_xl_be(param_sll, ¶m_ui); + res_vui = vec_xl_be(param_sll, param_ui_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) - res_vf = vec_xl_be(param_sll, ¶m_f); + res_vf = vec_xl_be(param_sll, param_f_ld); // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) } diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index 6fe6d9fdf72d6..ad63d646196c3 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -23,11 +23,15 @@ vector double vda, vdb; signed int *iap; unsigned int uia, uib, *uiap; signed char *cap; -unsigned char uca, *ucap; -signed short *sap; -unsigned short usa, *usap; -signed long long *llap, llb; -unsigned long long ulla, *ullap; +unsigned char uca; +const unsigned char *ucap; +const signed short *sap; +unsigned short usa; +const unsigned short *usap; +const signed long long *llap; +signed long long llb; +unsigned long long ulla; +const unsigned long long *ullap; vector signed long long test_vec_mul_sll(void) { // CHECK: mul <2 x i64> @@ -197,6 +201,36 @@ vector unsigned long long test_vcfuged(void) { return vec_cfuge(vulla, vullb); } +vector unsigned char test_vec_expandm_uc(void) { + // CHECK: @llvm.ppc.altivec.vexpandbm(<16 x i8> %{{.+}}) + // CHECK-NEXT: ret <16 x i8> + return vec_expandm(vuca); +} + +vector unsigned short test_vec_expandm_us(void) { + // CHECK: @llvm.ppc.altivec.vexpandhm(<8 x i16> %{{.+}}) + // CHECK-NEXT: ret <8 x i16> + return vec_expandm(vusa); +} + +vector unsigned int test_vec_expandm_ui(void) { + // CHECK: @llvm.ppc.altivec.vexpandwm(<4 x i32> %{{.+}}) + // CHECK-NEXT: ret <4 x i32> + return vec_expandm(vuia); +} + +vector unsigned long long test_vec_expandm_ull(void) { + // CHECK: @llvm.ppc.altivec.vexpanddm(<2 x i64> %{{.+}}) + // CHECK-NEXT: ret <2 x i64> + return vec_expandm(vulla); +} + +vector unsigned __int128 test_vec_expandm_u128(void) { + // CHECK: @llvm.ppc.altivec.vexpandqm(<1 x i128> %{{.+}}) + // CHECK-NEXT: ret <1 x i128> + return vec_expandm(vui128a); +} + unsigned long long test_vgnb_1(void) { // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 2) // CHECK-NEXT: ret i64 @@ -928,6 +962,44 @@ int test_vec_test_lsbb_all_zeros(void) { return vec_test_lsbb_all_zeros(vuca); } +vector unsigned __int128 test_vec_mule_u128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmuleud(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmuloud(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mule(vulla, vullb); +} + +vector signed __int128 test_vec_mule_s128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmulesd(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmulosd(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mule(vslla, vsllb); +} + +vector unsigned __int128 test_vec_mulo_u128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmuloud(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmuleud(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mulo(vulla, vullb); +} + +vector signed __int128 test_vec_mulo_s128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmulosd(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmulesd(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mulo(vslla, vsllb); +} + +vector unsigned __int128 test_vec_msumc_u128(void) { + // CHECK: @llvm.ppc.altivec.vmsumcud(<2 x i64> + // CHECK-NEXT: ret <1 x i128> + return vec_msumc(vulla, vullb, vui128a); +} + vector signed __int128 test_vec_xl_sext_i8(void) { // CHECK: load i8 // CHECK: sext i8 diff --git a/clang/test/CodeGen/builtins-ppc-xl-xst.c b/clang/test/CodeGen/builtins-ppc-xl-xst.c index 8ad45376e9779..226e9d8aff4e6 100644 --- a/clang/test/CodeGen/builtins-ppc-xl-xst.c +++ b/clang/test/CodeGen/builtins-ppc-xl-xst.c @@ -17,10 +17,12 @@ // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i16*, align 8 // CHECK-NEXT: store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8 @@ -35,7 +37,7 @@ // CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8 // CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8 @@ -50,9 +52,9 @@ // CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test1(vector signed short *c, signed short *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test1(vector signed short *c, signed short *st, const signed short *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test2( @@ -65,10 +67,12 @@ void test1(vector signed short *c, signed short *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i16*, align 8 // CHECK-NEXT: store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8 @@ -83,7 +87,7 @@ void test1(vector signed short *c, signed short *ptr) { // CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8 // CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8 @@ -98,9 +102,10 @@ void test1(vector signed short *c, signed short *ptr) { // CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test2(vector unsigned short *c, unsigned short *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test2(vector unsigned short *c, unsigned short *st, + const unsigned short *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test3( @@ -113,10 +118,12 @@ void test2(vector unsigned short *c, unsigned short *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8 @@ -131,7 +138,7 @@ void test2(vector unsigned short *c, unsigned short *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8 // CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8 @@ -146,9 +153,9 @@ void test2(vector unsigned short *c, unsigned short *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test3(vector signed int *c, signed int *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test3(vector signed int *c, signed int *st, const signed int *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test4( @@ -161,10 +168,12 @@ void test3(vector signed int *c, signed int *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8 @@ -179,7 +188,7 @@ void test3(vector signed int *c, signed int *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8 // CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8 @@ -194,9 +203,9 @@ void test3(vector signed int *c, signed int *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test4(vector unsigned int *c, unsigned int *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test4(vector unsigned int *c, unsigned int *st, const unsigned int *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test5( @@ -209,10 +218,12 @@ void test4(vector unsigned int *c, unsigned int *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i64*, align 8 // CHECK-NEXT: store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8 @@ -227,7 +238,7 @@ void test4(vector unsigned int *c, unsigned int *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8 // CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8 @@ -242,9 +253,10 @@ void test4(vector unsigned int *c, unsigned int *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test5(vector signed long long *c, signed long long *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test5(vector signed long long *c, signed long long *st, + const signed long long *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test6( @@ -257,10 +269,12 @@ void test5(vector signed long long *c, signed long long *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i64*, align 8 // CHECK-NEXT: store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8 @@ -275,7 +289,7 @@ void test5(vector signed long long *c, signed long long *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8 // CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8 @@ -290,9 +304,10 @@ void test5(vector signed long long *c, signed long long *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test6(vector unsigned long long *c, unsigned long long *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test6(vector unsigned long long *c, unsigned long long *st, + const unsigned long long *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test7( @@ -305,10 +320,12 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x float>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: store <4 x float>* [[C:%.*]], <4 x float>** [[C_ADDR]], align 8 -// CHECK-NEXT: store float* [[PTR:%.*]], float** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load float*, float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store float* [[ST:%.*]], float** [[ST_ADDR]], align 8 +// CHECK-NEXT: store float* [[LD:%.*]], float** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load float*, float** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store float* [[TMP0]], float** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[__PTR_ADDR_I]], align 8 @@ -323,7 +340,7 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) { // CHECK-NEXT: store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load float*, float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load float*, float** [[ST_ADDR]], align 8 // CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store float* [[TMP10]], float** [[__PTR_ADDR_I2]], align 8 @@ -338,9 +355,9 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) { // CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test7(vector float *c, float *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test7(vector float *c, float *st, const float *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test8( @@ -353,10 +370,12 @@ void test7(vector float *c, float *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x double>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca double*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca double*, align 8 // CHECK-NEXT: store <2 x double>* [[C:%.*]], <2 x double>** [[C_ADDR]], align 8 -// CHECK-NEXT: store double* [[PTR:%.*]], double** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store double* [[ST:%.*]], double** [[ST_ADDR]], align 8 +// CHECK-NEXT: store double* [[LD:%.*]], double** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store double* [[TMP0]], double** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[__PTR_ADDR_I]], align 8 @@ -371,7 +390,7 @@ void test7(vector float *c, float *ptr) { // CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load double*, double** [[ST_ADDR]], align 8 // CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store double* [[TMP10]], double** [[__PTR_ADDR_I2]], align 8 @@ -386,9 +405,9 @@ void test7(vector float *c, float *ptr) { // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test8(vector double *c, double *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test8(vector double *c, double *st, const double *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } #ifdef __POWER8_VECTOR__ @@ -402,10 +421,12 @@ void test8(vector double *c, double *ptr) { // CHECK-P8-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-P8-NEXT: [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8 -// CHECK-P8-NEXT: [[PTR_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[ST_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[LD_ADDR:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8 -// CHECK-P8-NEXT: store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8 -// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8 // CHECK-P8-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-P8-NEXT: store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8 // CHECK-P8-NEXT: [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8 @@ -420,7 +441,7 @@ void test8(vector double *c, double *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16 // CHECK-P8-NEXT: [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 // CHECK-P8-NEXT: [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16 -// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8 // CHECK-P8-NEXT: store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16 // CHECK-P8-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-P8-NEXT: store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8 @@ -435,9 +456,10 @@ void test8(vector double *c, double *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1 // CHECK-P8-NEXT: ret void // -void test9(vector signed __int128 *c, signed __int128 *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test9(vector signed __int128 *c, signed __int128 *st, + const signed __int128 *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-P8-LABEL: @test10( @@ -450,10 +472,12 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) { // CHECK-P8-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-P8-NEXT: [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8 -// CHECK-P8-NEXT: [[PTR_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[ST_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[LD_ADDR:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8 -// CHECK-P8-NEXT: store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8 -// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8 // CHECK-P8-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-P8-NEXT: store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8 // CHECK-P8-NEXT: [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8 @@ -468,7 +492,7 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16 // CHECK-P8-NEXT: [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 // CHECK-P8-NEXT: [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16 -// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8 // CHECK-P8-NEXT: store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16 // CHECK-P8-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-P8-NEXT: store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8 @@ -483,8 +507,9 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1 // CHECK-P8-NEXT: ret void // -void test10(vector unsigned __int128 *c, unsigned __int128 *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test10(vector unsigned __int128 *c, unsigned __int128 *st, + const unsigned __int128 *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } #endif diff --git a/clang/test/CodeGen/ssse3-builtins.c b/clang/test/CodeGen/ssse3-builtins.c index b89955fdc0880..d72ca9dd5b41c 100644 --- a/clang/test/CodeGen/ssse3-builtins.c +++ b/clang/test/CodeGen/ssse3-builtins.c @@ -7,25 +7,19 @@ __m128i test_mm_abs_epi8(__m128i a) { // CHECK-LABEL: test_mm_abs_epi8 - // CHECK: [[SUB:%.+]] = sub <16 x i8> zeroinitializer, [[A:%.+]] - // CHECK: [[CMP:%.+]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: %{{.*}} = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) return _mm_abs_epi8(a); } __m128i test_mm_abs_epi16(__m128i a) { // CHECK-LABEL: test_mm_abs_epi16 - // CHECK: [[SUB:%.+]] = sub <8 x i16> zeroinitializer, [[A:%.+]] - // CHECK: [[CMP:%.+]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: %{{.*}} = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) return _mm_abs_epi16(a); } __m128i test_mm_abs_epi32(__m128i a) { // CHECK-LABEL: test_mm_abs_epi32 - // CHECK: [[SUB:%.+]] = sub <4 x i32> zeroinitializer, [[A:%.+]] - // CHECK: [[CMP:%.+]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: %{{.*}} = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) return _mm_abs_epi32(a); } diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index 7a9fd2527272a..5415bddffc899 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -39,7 +39,7 @@ __global__ void num_vgpr_64() { // NAMD-NOT: "amdgpu-num-vgpr" // NAMD-NOT: "amdgpu-num-sgpr" -// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu index 6066469f76470..135d3030480c6 100644 --- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu +++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -39,4 +39,4 @@ int main() { launch((void*)D.Empty()); return 0; } -// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256" diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp index cb001cd06e02e..12550396d0fe7 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp @@ -14,7 +14,7 @@ // RUN: -target-feature +sve -target-feature +bf16 -msve-vector-bits=2048 \ // RUN: | FileCheck %s --check-prefix=CHECK-2048 -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); typedef __SVInt16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp index 7308aa6ae7a45..e9e15d6e0c4e5 100644 --- a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp @@ -14,7 +14,7 @@ // RUN: -target-feature +sve -target-feature +bf16 -msve-vector-bits=2048 \ // RUN: | FileCheck %s --check-prefix=CHECK-2048 -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS namespace std { class type_info; diff --git a/clang/test/CodeGenCXX/debug-info-class.cpp b/clang/test/CodeGenCXX/debug-info-class.cpp index 94d5a0f1f0820..e000532b8c3b1 100644 --- a/clang/test/CodeGenCXX/debug-info-class.cpp +++ b/clang/test/CodeGenCXX/debug-info-class.cpp @@ -136,7 +136,7 @@ int main(int argc, char **argv) { // CHECK: [[C_DTOR]] = !DISubprogram(name: "~C" // CHECK: [[D:![0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "D" -// CHECK-NOT: size: +// CHECK-SAME: size: // CHECK-SAME: DIFlagFwdDecl // CHECK-NOT: identifier: // CHECK-SAME: ){{$}} diff --git a/clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp b/clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp new file mode 100644 index 0000000000000..57e6dea72e21f --- /dev/null +++ b/clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fvisibility-inlines-hidden -fvisibility-inlines-hidden-static-local-var %s -emit-llvm -o - | FileCheck %s + +#define used __attribute__((used)) + +used inline void f1() { + // CHECK: @_ZZ2f1vE6f1_var = linkonce_odr hidden global i32 0 + static int f1_var = 0; +} + +__attribute__((visibility("default"))) +used inline void f2() { + // CHECK: @_ZZ2f2vE6f2_var = linkonce_odr global i32 0 + static int f2_var = 0; +} + +struct S { + used void f3() { + // CHECK: @_ZZN1S2f3EvE6f3_var = linkonce_odr hidden global i32 0 + static int f3_var = 0; + } + + void f6(); + void f7(); +}; + +used void f4() { + // CHECK: @_ZZ2f4vE6f4_var = internal global i32 0 + static int f4_var = 0; +} + +__attribute__((visibility("default"))) +used void f5() { + // CHECK: @_ZZ2f5vE6f5_var = internal global i32 0 + static int f5_var = 0; +} + +used void S::f6() { + // CHECK: @_ZZN1S2f6EvE6f6_var = internal global i32 0 + static int f6_var = 0; +} + +used inline void S::f7() { + // CHECK: @_ZZN1S2f7EvE6f7_var = linkonce_odr hidden global i32 0 + static int f7_var = 0; +} + + +struct __attribute__((visibility("default"))) S2 { + used void f8() { + // CHECK: @_ZZN2S22f8EvE6f8_var = linkonce_odr hidden global i32 0 + static int f8_var = 0; + } +}; diff --git a/clang/test/CodeGenObjC/attr-used-on-method.m b/clang/test/CodeGenObjC/attr-used-on-method.m new file mode 100644 index 0000000000000..d8b2a5d291841 --- /dev/null +++ b/clang/test/CodeGenObjC/attr-used-on-method.m @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 %s -S -emit-llvm -o - | FileCheck %s + +// CHECK: @llvm.used = +// CHECK-SAME: @"\01-[X m]" + +// CHECK: define internal void @"\01-[X m]"( + +@interface X @end +@implementation X +-(void) m __attribute__((used)) {} +@end diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c index 595959b48828a..15e3e012f2ac1 100644 --- a/clang/test/Driver/cl-x86-flags.c +++ b/clang/test/Driver/cl-x86-flags.c @@ -128,5 +128,9 @@ // RUN: %clang_cl -m64 -arch:avx512 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx51264 %s // avx51264: argument unused during compilation +// RUN: %clang_cl -m64 -arch:AVX -tune:haswell --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=tune %s +// tune: "-target-cpu" "sandybridge" +// tune-SAME: "-tune-cpu" "haswell" + void f() { } diff --git a/clang/test/Driver/config-file3.c b/clang/test/Driver/config-file3.c index 148646c2ebbf1..fc5c286553ad5 100644 --- a/clang/test/Driver/config-file3.c +++ b/clang/test/Driver/config-file3.c @@ -1,14 +1,15 @@ // REQUIRES: shell // REQUIRES: x86-registered-target +// RUN: rm -rf %t && mkdir %t + //--- If config file is specified by relative path (workdir/cfg-s2), it is searched for by that path. + +// RUN: mkdir -p %t/workdir/subdir +// RUN: echo "@subdir/cfg-s2" > %t/workdir/cfg-1 +// RUN: echo "-Wundefined-var-template" > %t/workdir/subdir/cfg-s2 // -// RUN: mkdir -p %T/workdir -// RUN: echo "@subdir/cfg-s2" > %T/workdir/cfg-1 -// RUN: mkdir -p %T/workdir/subdir -// RUN: echo "-Wundefined-var-template" > %T/workdir/subdir/cfg-s2 -// -// RUN: ( cd %T && %clang --config workdir/cfg-1 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-REL ) +// RUN: ( cd %t && %clang --config workdir/cfg-1 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-REL ) // // CHECK-REL: Configuration file: {{.*}}/workdir/cfg-1 // CHECK-REL: -Wundefined-var-template @@ -16,12 +17,11 @@ //--- Invocation qqq-clang-g++ tries to find config file qqq-clang-g++.cfg first. // -// RUN: rm -rf %T/testdmode -// RUN: mkdir -p %T/testdmode -// RUN: ln -s %clang %T/testdmode/qqq-clang-g++ -// RUN: echo "-Wundefined-func-template" > %T/testdmode/qqq-clang-g++.cfg -// RUN: echo "-Werror" > %T/testdmode/qqq.cfg -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix FULL-NAME +// RUN: mkdir %t/testdmode +// RUN: ln -s %clang %t/testdmode/qqq-clang-g++ +// RUN: echo "-Wundefined-func-template" > %t/testdmode/qqq-clang-g++.cfg +// RUN: echo "-Werror" > %t/testdmode/qqq.cfg +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix FULL-NAME // // FULL-NAME: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg // FULL-NAME: -Wundefined-func-template @@ -31,20 +31,20 @@ // (As the clang executable and symlink are in different directories, this // requires specifying the path via --config-*-dir= though.) // -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir=%T/testdmode -c %s -### 2>&1 | FileCheck %s -check-prefix SYMLINK +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir=%t/testdmode -c %s -### 2>&1 | FileCheck %s -check-prefix SYMLINK // // SYMLINK: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg // //--- File specified by --config overrides config inferred from clang executable. // -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT // // CHECK-EXPLICIT: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg // //--- Invocation qqq-clang-g++ tries to find config file qqq.cfg if qqq-clang-g++.cfg is not found. // -// RUN: rm %T/testdmode/qqq-clang-g++.cfg -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix SHORT-NAME +// RUN: rm %t/testdmode/qqq-clang-g++.cfg +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix SHORT-NAME // // SHORT-NAME: Configuration file: {{.*}}/testdmode/qqq.cfg // SHORT-NAME: -Werror @@ -53,11 +53,10 @@ //--- Config files are searched for in binary directory as well. // -// RUN: rm -rf %T/testbin -// RUN: mkdir -p %T/testbin -// RUN: ln -s %clang %T/testbin/clang -// RUN: echo "-Werror" > %T/testbin/aaa.cfg -// RUN: %T/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-BIN +// RUN: mkdir %t/testbin +// RUN: ln -s %clang %t/testbin/clang +// RUN: echo "-Werror" > %t/testbin/aaa.cfg +// RUN: %t/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-BIN // // CHECK-BIN: Configuration file: {{.*}}/testbin/aaa.cfg // CHECK-BIN: -Werror @@ -68,12 +67,11 @@ //--- When reloading config file, x86_64-clang-g++ tries to find config i386-clang-g++.cfg first. // -// RUN: rm -rf %T/testreload -// RUN: mkdir -p %T/testreload -// RUN: ln -s %clang %T/testreload/x86_64-clang-g++ -// RUN: echo "-Wundefined-func-template" > %T/testreload/i386-clang-g++.cfg -// RUN: echo "-Werror" > %T/testreload/i386.cfg -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD +// RUN: mkdir %t/testreload +// RUN: ln -s %clang %t/testreload/x86_64-clang-g++ +// RUN: echo "-Wundefined-func-template" > %t/testreload/i386-clang-g++.cfg +// RUN: echo "-Werror" > %t/testreload/i386.cfg +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD // // CHECK-RELOAD: Configuration file: {{.*}}/testreload/i386-clang-g++.cfg // CHECK-RELOAD: -Wundefined-func-template @@ -81,24 +79,24 @@ //--- If config file is specified by --config and its name does not start with architecture, it is used without reloading. // -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a // // CHECK-RELOAD1a: Configuration file: {{.*}}/Inputs/config-3.cfg // -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -target i386 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -target i386 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b // // CHECK-RELOAD1b: Configuration file: {{.*}}/Inputs/config-3.cfg //--- If config file is specified by --config and its name starts with architecture, it is reloaded. // -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c // // CHECK-RELOAD1c: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg //--- x86_64-clang-g++ tries to find config i386.cfg if i386-clang-g++.cfg is not found. // -// RUN: rm %T/testreload/i386-clang-g++.cfg -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d +// RUN: rm %t/testreload/i386-clang-g++.cfg +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d // // CHECK-RELOAD1d: Configuration file: {{.*}}/testreload/i386.cfg // CHECK-RELOAD1d: -Werror diff --git a/clang/test/Driver/darwin-objc-options.m b/clang/test/Driver/darwin-objc-options.m index 6684a5272175b..8721fbc1ef1e2 100644 --- a/clang/test/Driver/darwin-objc-options.m +++ b/clang/test/Driver/darwin-objc-options.m @@ -46,3 +46,12 @@ // RUN: %clang -target x86_64-linux-gnu -### %s 2>&1 | FileCheck --check-prefix=OTHER_COMPATIBILITY %s // DARWIN_COMPATIBILITY: -fcompatibility-qualified-id-block-type-checking // OTHER_COMPATIBILITY-NOT: -fcompatibility-qualified-id-block-type-checking + +// Add -fvisibility-inlines-hidden-static-local-var on Darwin. +// RUN: %clang -target x86_64-apple-darwin10 -### %s 2>&1 | FileCheck --check-prefix=DARWIN_INLINES_HIDDEN %s +// RUN: %clang -target x86_64-apple-darwin10 -fno-visibility-inlines-hidden-static-local-var -### %s 2>&1 | FileCheck --check-prefix=DARWIN_INLINES_HIDDEN_EXPLICIT_NO %s +// RUN: %clang -target x86_64-linux-gnu -### %s 2>&1 | FileCheck --check-prefix=NO_DARWIN_INLINES_HIDDEN %s +// DARWIN_INLINES_HIDDEN: -fvisibility-inlines-hidden-static-local-var +// DARWIN_INLINES_HIDDEN_EXPLICIT_NO-NOT: -fvisibility-inlines-hidden-static-local-var +// DARWIN_INLINES_HIDDEN_EXPLICIT_NO: -fno-visibility-inlines-hidden-static-local-var +// NO_DARWIN_INLINES_HIDDEN-NOT: -fvisibility-inlines-hidden-static-local-var diff --git a/clang/test/Driver/fmemprof.cpp b/clang/test/Driver/fmemprof.cpp index 049067803e2b4..a2b740e1e6e5e 100644 --- a/clang/test/Driver/fmemprof.cpp +++ b/clang/test/Driver/fmemprof.cpp @@ -1,6 +1,6 @@ -// RUN: %clangxx -target x86_64-linux-gnu -fmemprof %s -### 2>&1 | FileCheck %s -// RUN: %clangxx -target x86_64-linux-gnu -fmemprof -fno-memprof %s -### 2>&1 | FileCheck %s --check-prefix=OFF -// CHECK: "-cc1" {{.*}} "-fmemprof" +// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile %s -### 2>&1 | FileCheck %s +// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile -fno-memory-profile %s -### 2>&1 | FileCheck %s --check-prefix=OFF +// CHECK: "-cc1" {{.*}} "-fmemory-profile" // CHECK: ld{{.*}}libclang_rt.heapprof{{.*}}libclang_rt.heapprof_cxx -// OFF-NOT: "-fmemprof" +// OFF-NOT: "-fmemory-profile" // OFF-NOT: libclang_rt.heapprof diff --git a/clang/test/Driver/freebsd.c b/clang/test/Driver/freebsd.c index 769bb22da0dc7..1bf6dab802a1c 100644 --- a/clang/test/Driver/freebsd.c +++ b/clang/test/Driver/freebsd.c @@ -176,7 +176,7 @@ // RUN: %clang -mcpu=ultrasparc -target sparc64-unknown-freebsd8 %s -### -no-integrated-as 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SPARC-CPU %s // CHECK-SPARC-CPU: cc1{{.*}}" "-target-cpu" "ultrasparc" -// CHECK-SPARC-CPU: as{{.*}}" "-Av9 +// CHECK-SPARC-CPU: as{{.*}}" "-Av9a // Check that -G flags are passed to the linker for mips // RUN: %clang -target mips-unknown-freebsd %s -### -G0 2>&1 \ diff --git a/clang/test/Driver/gcc_forward.c b/clang/test/Driver/gcc_forward.c index a99944f8f5336..e6b0670d1a027 100644 --- a/clang/test/Driver/gcc_forward.c +++ b/clang/test/Driver/gcc_forward.c @@ -1,7 +1,8 @@ // RUN: %clang -### %s -target aarch64-none-elf \ -// RUN: --coverage -e _start -fuse-ld=lld --ld-path=ld -nostdlib -r -rdynamic -static -static-pie \ +// RUN: --coverage -e _start -fuse-ld=lld --ld-path=ld -nostartfiles \ +// RUN: -nostdlib -r -rdynamic -specs=nosys.specs -static -static-pie \ // RUN: 2>&1 | FileCheck --check-prefix=FORWARD %s -// FORWARD: gcc{{[^"]*}}" "--coverage" "-fuse-ld=lld" "--ld-path=ld" "-nostdlib" "-rdynamic" "-static" "-static-pie" "-o" "a.out" "{{.*}}.o" "-e" "_start" "-r" +// FORWARD: gcc{{[^"]*}}" "--coverage" "-fuse-ld=lld" "--ld-path=ld" "-nostartfiles" "-nostdlib" "-rdynamic" "-specs=nosys.specs" "-static" "-static-pie" "-o" "a.out" "{{.*}}.o" "-e" "_start" "-r" // Check that we don't try to forward -Xclang or -mlinker-version to GCC. // PR12920 -- Check also we may not forward W_Group options to GCC. diff --git a/clang/test/Driver/linux-as.c b/clang/test/Driver/linux-as.c index 77ac05f30942c..0959bd7ba0a11 100644 --- a/clang/test/Driver/linux-as.c +++ b/clang/test/Driver/linux-as.c @@ -168,7 +168,7 @@ // RUN: | FileCheck -check-prefix=CHECK-SPARCV9 %s // CHECK-SPARCV9: as // CHECK-SPARCV9: -64 -// CHECK-SPARCV9: -Av9 +// CHECK-SPARCV9: -Av9a // CHECK-SPARCV9-NOT: -KPIC // CHECK-SPARCV9: -o // @@ -177,7 +177,7 @@ // RUN: | FileCheck -check-prefix=CHECK-SPARCV9PIC %s // CHECK-SPARCV9PIC: as // CHECK-SPARCV9PIC: -64 -// CHECK-SPARCV9PIC: -Av9 +// CHECK-SPARCV9PIC: -Av9a // CHECK-SPARCV9PIC: -KPIC // CHECK-SPARCV9PIC: -o // diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c index 203b4b4a2ff0f..ae1aa64416907 100644 --- a/clang/test/Driver/openbsd.c +++ b/clang/test/Driver/openbsd.c @@ -70,7 +70,7 @@ // RUN: | FileCheck -check-prefix=CHECK-MIPS64EL-PIC %s // CHECK-AMD64-M32: as{{.*}}" "--32" // CHECK-POWERPC: as{{.*}}" "-mppc" "-many" -// CHECK-SPARC64: as{{.*}}" "-64" "-Av9" +// CHECK-SPARC64: as{{.*}}" "-64" "-Av9a" // CHECK-MIPS64: as{{.*}}" "-mabi" "64" "-EB" // CHECK-MIPS64-PIC: as{{.*}}" "-mabi" "64" "-EB" "-KPIC" // CHECK-MIPS64EL: as{{.*}}" "-mabi" "64" "-EL" diff --git a/clang/test/Driver/target-override.c b/clang/test/Driver/target-override.c index b4dbd2da1df6f..ddda8aaad85a0 100644 --- a/clang/test/Driver/target-override.c +++ b/clang/test/Driver/target-override.c @@ -1,16 +1,15 @@ // REQUIRES: shell // REQUIRES: x86-registered-target -// RUN: rm -rf %T/testbin -// RUN: mkdir -p %T/testbin -// RUN: ln -s %clang %T/testbin/i386-clang +// RUN: rm -rf %t && mkdir %t +// RUN: ln -s %clang %t/i386-clang // Check if invocation of "foo-clang" adds option "-target foo". // -// RUN: %T/testbin/i386-clang -c -no-canonical-prefixes %s -### 2>&1 | FileCheck -check-prefix CHECK-TG1 %s +// RUN: %t/i386-clang -c -no-canonical-prefixes %s -### 2>&1 | FileCheck -check-prefix CHECK-TG1 %s // CHECK-TG1: Target: i386 // Check if invocation of "foo-clang -target bar" overrides option "-target foo". // -// RUN: %T/testbin/i386-clang -c -no-canonical-prefixes -target x86_64 %s -### 2>&1 | FileCheck -check-prefix CHECK-TG2 %s +// RUN: %t/i386-clang -c -no-canonical-prefixes -target x86_64 %s -### 2>&1 | FileCheck -check-prefix CHECK-TG2 %s // CHECK-TG2: Target: x86_64 diff --git a/clang/test/Format/dump-config-list-override.cpp b/clang/test/Format/dump-config-list-override.cpp new file mode 100644 index 0000000000000..df4c6ad1333ef --- /dev/null +++ b/clang/test/Format/dump-config-list-override.cpp @@ -0,0 +1,24 @@ +/// Check that the ForEachMacros, etc. config entries replace default values instead of appending +/// FIXME: clang-format currently start overriding at index 0 (keeping the remaining +/// values) instead of either appending or completely replacing the values. +/// This behaviour is highly confusing. For now this test documents the current state. +// RUN: clang-format -style="{BasedOnStyle: LLVM}" -dump-config %s | \ +// RUN: FileCheck %s --check-prefixes=CHECK,DEFAULT +// RUN: clang-format -style="{BasedOnStyle: LLVM, ForEachMacros: ['OVERRIDE_FOREACH']}" -dump-config %s | \ +// RUN: FileCheck %s --check-prefixes=CHECK,OVERRIDE,FIXME-SHOULD-NOT-BE +// RUN: clang-format -style="{BasedOnStyle: LLVM, ForEachMacros: ['M1', 'M2', 'M3', 'M4']}" -dump-config %s | \ +// RUN: FileCheck %s --check-prefixes=CHECK,MORE-ENTRIES-THAN-DEFAULT + + +// CHECK-LABEL: ForEachMacros: +// DEFAULT-NEXT: {{^ }}- foreach +// DEFAULT-NEXT: {{^ }}- Q_FOREACH +// DEFAULT-NEXT: {{^ }}- BOOST_FOREACH +// OVERRIDE-NEXT: {{^ }}- OVERRIDE_FOREACH +// FIXME-SHOULD-NOT-BE-NEXT: {{^ }}- Q_FOREACH +// FIXME-SHOULD-NOT-BE-NEXT: {{^ }}- BOOST_FOREACH +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M1 +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M2 +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M3 +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M4 +// CHECK-NEXT: {{^[F-Z]}} diff --git a/clang/test/PCH/cxx1z-decomposition.cpp b/clang/test/PCH/cxx1z-decomposition.cpp index 2f817b4280ded..914ce80c550d1 100644 --- a/clang/test/PCH/cxx1z-decomposition.cpp +++ b/clang/test/PCH/cxx1z-decomposition.cpp @@ -2,11 +2,11 @@ // RUN: %clang_cc1 -pedantic -std=c++1z -include %s -verify %s // // With PCH: -// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch %s -o %t -// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -verify %s +// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fallow-pch-with-compiler-errors %s -o %t +// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -fallow-pch-with-compiler-errors -verify %s -// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fpch-instantiate-templates %s -o %t -// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -verify %s +// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fallow-pch-with-compiler-errors -fpch-instantiate-templates %s -o %t +// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -fallow-pch-with-compiler-errors -verify %s #ifndef HEADER #define HEADER @@ -22,6 +22,8 @@ constexpr int foo(Q &&q) { return a * 10 + b; } +auto [noinit]; // expected-error{{decomposition declaration '[noinit]' requires an initializer}} + #else int arr[2]; diff --git a/clang/test/Parser/pragma-fenv_round.c b/clang/test/Parser/pragma-fenv_round.c new file mode 100644 index 0000000000000..56abf7bf75a40 --- /dev/null +++ b/clang/test/Parser/pragma-fenv_round.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -fsyntax-only -Wignored-pragmas -verify %s + +#pragma STDC FENV_ROUND ON // expected-warning {{invalid or unsupported rounding mode}} + +float func_01(int x, float y) { + if (x) + return y + 2; + #pragma STDC FENV_ROUND FE_DOWNWARD // expected-error{{'#pragma STDC FENV_ROUND' can only appear at file scope or at the start of a compound statement}} + // expected-warning@-1{{pragma STDC FENV_ROUND is not supported}} + return x + y; +} diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 905a77785a9d8..cb137eea072e6 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -44,12 +44,12 @@ // CHECK-NOT: __ARM_BF16_FORMAT_ALTERNATIVE 1 // CHECK-NOT: __ARM_FEATURE_BF16 1 // CHECK-NOT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 0 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 0 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 128 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 256 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 512 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 1024 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 2048 // RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN // CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1 @@ -444,10 +444,8 @@ // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=1024 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-1024 %s // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s -// NOTE: The __ARM_FEATURE_SVE_BITS feature macro is experimental until the -// feature is complete. -// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128 -// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256 -// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512 -// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024 -// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048 +// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS 128 +// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS 256 +// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS 512 +// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS 1024 +// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS 2048 diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c index f143037fd6114..1bcbfa360c976 100644 --- a/clang/test/Sema/attr-arm-sve-vector-bits.c +++ b/clang/test/Sema/attr-arm-sve-vector-bits.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=1024 -fallow-half-arguments-and-returns %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=2048 -fallow-half-arguments-and-returns %s -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t svint8_t; typedef __SVInt16_t svint16_t; diff --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp index c8ce257ad3265..ea7c4778db0ea 100644 --- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp +++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns %s // expected-no-diagnostics -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t svint8_t; typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp index 06771f8f3252a..5b5d1cb7bc807 100644 --- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp @@ -23,6 +23,10 @@ static_assert(sizeof(long long) == 8); template constexpr To bit_cast(const From &from) { static_assert(sizeof(To) == sizeof(From)); + // expected-note@+9 {{cannot be represented in type 'bool'}} +#ifdef __x86_64 + // expected-note@+7 {{or 'std::byte'; '__int128' is invalid}} +#endif #ifdef __CHAR_UNSIGNED__ // expected-note@+4 2 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'signed char' is invalid}} #else @@ -397,3 +401,65 @@ union IdentityInUnion { }; constexpr IdentityInUnion identity3a = {42}; constexpr unsigned char identity3b = __builtin_bit_cast(unsigned char, identity3a.n); + +namespace test_bool { + +constexpr bool test_bad_bool = bit_cast('A'); // expected-error {{must be initialized by a constant expression}} expected-note{{in call}} + +static_assert(round_trip(true), ""); +static_assert(round_trip(false), ""); +static_assert(round_trip(false), ""); + +static_assert(round_trip((char)0), ""); +static_assert(round_trip((char)1), ""); +} + +namespace test_long_double { +#ifdef __x86_64 +constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // expected-error{{must be initialized by a constant expression}} expected-note{{in call}} + +constexpr long double ld = 3.1425926539; + +struct bytes { + unsigned char d[16]; +}; + +static_assert(round_trip(ld), ""); + +static_assert(round_trip(10.0L)); + +constexpr bool f(bool read_uninit) { + bytes b = bit_cast(ld); + unsigned char ld_bytes[10] = { + 0x0, 0x48, 0x9f, 0x49, 0xf0, + 0x3c, 0x20, 0xc9, 0x0, 0x40, + }; + + for (int i = 0; i != 10; ++i) + if (ld_bytes[i] != b.d[i]) + return false; + + if (read_uninit && b.d[10]) // expected-note{{read of uninitialized object is not allowed in a constant expression}} + return false; + + return true; +} + +static_assert(f(/*read_uninit=*/false), ""); +static_assert(f(/*read_uninit=*/true), ""); // expected-error{{static_assert expression is not an integral constant expression}} expected-note{{in call to 'f(true)'}} + +constexpr bytes ld539 = { + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xc0, 0x86, + 0x8, 0x40, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +constexpr long double fivehundredandthirtynine = 539.0; + +static_assert(bit_cast(ld539) == fivehundredandthirtynine, ""); + +#else +static_assert(round_trip<__int128_t>(34.0L)); +#endif +} diff --git a/clang/test/SemaCXX/thread-safety-annotations.h b/clang/test/SemaCXX/thread-safety-annotations.h index 7755a1b328e7e..d89bcf8ff4706 100644 --- a/clang/test/SemaCXX/thread-safety-annotations.h +++ b/clang/test/SemaCXX/thread-safety-annotations.h @@ -6,6 +6,7 @@ #define ASSERT_SHARED_LOCK(...) __attribute__((assert_shared_capability(__VA_ARGS__))) #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((acquire_capability(__VA_ARGS__))) #define SHARED_LOCK_FUNCTION(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) +#define UNLOCK_FUNCTION(...) __attribute__((release_generic_capability(__VA_ARGS__))) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__((try_acquire_capability(__VA_ARGS__))) #define SHARED_TRYLOCK_FUNCTION(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((requires_capability(__VA_ARGS__))) @@ -16,6 +17,7 @@ #define ASSERT_SHARED_LOCK(...) __attribute__((assert_shared_lock(__VA_ARGS__))) #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) #define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) +#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__))) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__((exclusive_trylock_function(__VA_ARGS__))) #define SHARED_TRYLOCK_FUNCTION(...) __attribute__((shared_trylock_function(__VA_ARGS__))) #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) @@ -23,7 +25,6 @@ #endif // Lock semantics only -#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__))) #define GUARDED_VAR __attribute__((guarded_var)) #define PT_GUARDED_VAR __attribute__((pt_guarded_var)) diff --git a/clang/test/SemaCXX/unreachable-code.cpp b/clang/test/SemaCXX/unreachable-code.cpp index fd006c099e7dc..0dfc3d5744fb3 100644 --- a/clang/test/SemaCXX/unreachable-code.cpp +++ b/clang/test/SemaCXX/unreachable-code.cpp @@ -68,3 +68,12 @@ int pr6130(unsigned i) { throw PR6130(); // no-warning } } + +extern "C" void foo(void); +extern "C" __attribute__((weak)) decltype(foo) foo; + +void weak_redecl() { + if (foo) + return; + bar(); // no-warning +} diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index 91bd15def577d..d1520b1decbd3 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -5036,7 +5036,8 @@ void spawn_fake_flight_control_thread(void) { } extern const char *deque_log_msg(void) __attribute__((requires_capability(Logger))); -void logger_entry(void) __attribute__((requires_capability(Logger))) { +void logger_entry(void) __attribute__((requires_capability(Logger))) + __attribute__((requires_capability(!FlightControl))) { const char *msg; while ((msg = deque_log_msg())) { @@ -5044,13 +5045,13 @@ void logger_entry(void) __attribute__((requires_capability(Logger))) { } } -void spawn_fake_logger_thread(void) { +void spawn_fake_logger_thread(void) __attribute__((requires_capability(!FlightControl))) { acquire(Logger); logger_entry(); release(Logger); } -int main(void) { +int main(void) __attribute__((requires_capability(!FlightControl))) { spawn_fake_flight_control_thread(); spawn_fake_logger_thread(); diff --git a/clang/test/SemaCXX/warn-thread-safety-negative.cpp b/clang/test/SemaCXX/warn-thread-safety-negative.cpp index 456fe16e6574e..68e30f4a3225b 100644 --- a/clang/test/SemaCXX/warn-thread-safety-negative.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-negative.cpp @@ -81,6 +81,35 @@ class Foo { } // end namespace SimpleTest +Mutex globalMutex; + +namespace ScopeTest { + +void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); +void fq() EXCLUSIVE_LOCKS_REQUIRED(!::globalMutex); + +namespace ns { + Mutex globalMutex; + void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); + void fq() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex); +} + +void testGlobals() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex) { + f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} + fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} + ns::f(); + ns::fq(); +} + +void testNamespaceGlobals() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex) { + f(); + fq(); + ns::f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} + ns::fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} +} + +} // end namespace ScopeTest + namespace DoubleAttribute { struct Foo { diff --git a/clang/test/SemaObjC/arc-repeated-weak.mm b/clang/test/SemaObjC/arc-repeated-weak.mm index 4eec4d2fe69c7..90388598c7b8d 100644 --- a/clang/test/SemaObjC/arc-repeated-weak.mm +++ b/clang/test/SemaObjC/arc-repeated-weak.mm @@ -485,3 +485,17 @@ void foo1() { @class NSString; static NSString* const kGlobal = @""; + +@interface NSDictionary +- (id)objectForKeyedSubscript:(id)key; +@end + +@interface WeakProp +@property (weak) NSDictionary *nd; +@end + +@implementation WeakProp +-(void)m { + (void)self.nd[@""]; // no warning +} +@end diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index a6b9df3080488..6a715b27518ae 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -164,6 +164,12 @@ CXSourceRange cxloc::translateSourceRange(const SourceManager &SM, return Result; } +CharSourceRange cxloc::translateCXRangeToCharRange(CXSourceRange R) { + return CharSourceRange::getCharRange( + SourceLocation::getFromRawEncoding(R.begin_int_data), + SourceLocation::getFromRawEncoding(R.end_int_data)); +} + //===----------------------------------------------------------------------===// // Cursor visitor. //===----------------------------------------------------------------------===// @@ -8850,6 +8856,42 @@ void clang::PrintLibclangResourceUsage(CXTranslationUnit TU) { clang_disposeCXTUResourceUsage(Usage); } +CXCursor clang_Cursor_getVarDeclInitializer(CXCursor cursor) { + const Decl *const D = getCursorDecl(cursor); + if (!D) + return clang_getNullCursor(); + const auto *const VD = dyn_cast(D); + if (!VD) + return clang_getNullCursor(); + const Expr *const Init = VD->getInit(); + if (!Init) + return clang_getNullCursor(); + + return cxcursor::MakeCXCursor(Init, VD, cxcursor::getCursorTU(cursor)); +} + +int clang_Cursor_hasVarDeclGlobalStorage(CXCursor cursor) { + const Decl *const D = getCursorDecl(cursor); + if (!D) + return -1; + const auto *const VD = dyn_cast(D); + if (!VD) + return -1; + + return VD->hasGlobalStorage(); +} + +int clang_Cursor_hasVarDeclExternalStorage(CXCursor cursor) { + const Decl *const D = getCursorDecl(cursor); + if (!D) + return -1; + const auto *const VD = dyn_cast(D); + if (!VD) + return -1; + + return VD->hasExternalStorage(); +} + //===----------------------------------------------------------------------===// // Misc. utility functions. //===----------------------------------------------------------------------===// diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index a4077140acee8..c3b9ab6ffb9b0 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -20,6 +20,7 @@ set(SOURCES CXType.cpp Indexing.cpp FatalErrorHandler.cpp + Rewrite.cpp ADDITIONAL_HEADERS CIndexDiagnostic.h @@ -41,6 +42,7 @@ set(LIBS clangFrontend clangIndex clangLex + clangRewrite clangSema clangSerialization clangTooling diff --git a/clang/tools/libclang/CXSourceLocation.h b/clang/tools/libclang/CXSourceLocation.h index 6702d0cf9791b..ce3d09e1c9eb8 100644 --- a/clang/tools/libclang/CXSourceLocation.h +++ b/clang/tools/libclang/CXSourceLocation.h @@ -71,7 +71,11 @@ static inline SourceRange translateCXSourceRange(CXSourceRange R) { SourceLocation::getFromRawEncoding(R.end_int_data)); } - +/// Translates CXSourceRange to CharSourceRange. +/// The semantics of \p R are: +/// R.begin_int_data is first character of the range. +/// R.end_int_data is one character past the end of the range. +CharSourceRange translateCXRangeToCharRange(CXSourceRange R); }} // end namespace: clang::cxloc #endif diff --git a/clang/tools/libclang/Rewrite.cpp b/clang/tools/libclang/Rewrite.cpp new file mode 100644 index 0000000000000..389232d97acc1 --- /dev/null +++ b/clang/tools/libclang/Rewrite.cpp @@ -0,0 +1,63 @@ +//===- Rewrite.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-c/Rewrite.h" +#include "CXSourceLocation.h" +#include "CXTranslationUnit.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/ASTUnit.h" +#include "clang/Rewrite/Core/Rewriter.h" + +CXRewriter clang_CXRewriter_create(CXTranslationUnit TU) { + if (clang::cxtu::isNotUsableTU(TU)) { + LOG_BAD_TU(TU); + return {}; + } + clang::ASTUnit *AU = clang::cxtu::getASTUnit(TU); + assert(AU); + return reinterpret_cast( + new clang::Rewriter(AU->getSourceManager(), AU->getLangOpts())); +} + +void clang_CXRewriter_insertTextBefore(CXRewriter Rew, CXSourceLocation Loc, + const char *Insert) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.InsertTextBefore(clang::cxloc::translateSourceLocation(Loc), Insert); +} + +void clang_CXRewriter_replaceText(CXRewriter Rew, CXSourceRange ToBeReplaced, + const char *Replacement) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.ReplaceText(clang::cxloc::translateCXRangeToCharRange(ToBeReplaced), + Replacement); +} + +void clang_CXRewriter_removeText(CXRewriter Rew, CXSourceRange ToBeRemoved) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.RemoveText(clang::cxloc::translateCXRangeToCharRange(ToBeRemoved)); +} + +int clang_CXRewriter_overwriteChangedFiles(CXRewriter Rew) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + return R.overwriteChangedFiles(); +} + +void clang_CXRewriter_writeMainFileToStdOut(CXRewriter Rew) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.getEditBuffer(R.getSourceMgr().getMainFileID()).write(llvm::outs()); +} + +void clang_CXRewriter_dispose(CXRewriter Rew) { + if (Rew) + delete reinterpret_cast(Rew); +} diff --git a/clang/tools/libclang/libclang.exports b/clang/tools/libclang/libclang.exports index defbaa91a488c..528424713a9a5 100644 --- a/clang/tools/libclang/libclang.exports +++ b/clang/tools/libclang/libclang.exports @@ -382,3 +382,13 @@ clang_PrintingPolicy_setProperty clang_PrintingPolicy_dispose clang_install_aborting_llvm_fatal_error_handler clang_uninstall_llvm_fatal_error_handler +clang_Cursor_getVarDeclInitializer +clang_Cursor_hasVarDeclGlobalStorage +clang_Cursor_hasVarDeclExternalStorage +clang_CXRewriter_create +clang_CXRewriter_insertTextBefore +clang_CXRewriter_replaceText +clang_CXRewriter_removeText +clang_CXRewriter_overwriteChangedFiles +clang_CXRewriter_writeMainFileToStdOut +clang_CXRewriter_dispose diff --git a/clang/tools/scan-build-py/bin/analyze-build b/clang/tools/scan-build-py/bin/analyze-build index 6c285874a2083..0884ef2234bf4 100755 --- a/clang/tools/scan-build-py/bin/analyze-build +++ b/clang/tools/scan-build-py/bin/analyze-build @@ -5,12 +5,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import multiprocessing -multiprocessing.freeze_support() - import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) from libscanbuild.analyze import analyze_build -sys.exit(analyze_build()) + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(analyze_build()) diff --git a/clang/tools/scan-build-py/bin/intercept-build b/clang/tools/scan-build-py/bin/intercept-build index 23f5104782ca7..d9757b77b5c73 100755 --- a/clang/tools/scan-build-py/bin/intercept-build +++ b/clang/tools/scan-build-py/bin/intercept-build @@ -5,12 +5,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import multiprocessing -multiprocessing.freeze_support() - import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) from libscanbuild.intercept import intercept_build -sys.exit(intercept_build()) + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(intercept_build()) diff --git a/clang/tools/scan-build-py/bin/scan-build b/clang/tools/scan-build-py/bin/scan-build index 156da064a2b47..be4e51887e30b 100755 --- a/clang/tools/scan-build-py/bin/scan-build +++ b/clang/tools/scan-build-py/bin/scan-build @@ -5,12 +5,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import multiprocessing -multiprocessing.freeze_support() - import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) from libscanbuild.analyze import scan_build -sys.exit(scan_build()) + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(scan_build()) diff --git a/clang/tools/scan-view/share/ScanView.py b/clang/tools/scan-view/share/ScanView.py index a6cc7692ffe00..5a5d15e85b30c 100644 --- a/clang/tools/scan-view/share/ScanView.py +++ b/clang/tools/scan-view/share/ScanView.py @@ -744,7 +744,7 @@ def send_file(self, f, ctype): return f def send_string(self, s, ctype='text/html', headers=True, mtime=None): - encoded_s = s.encode() + encoded_s = s.encode('utf-8') if headers: self.send_response(200) self.send_header("Content-type", ctype) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index f2978cdbed8d6..b198efa4af9ec 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -20,6 +20,7 @@ using clang::tooling::ReplacementTest; using clang::tooling::toReplacements; +using testing::internal::ScopedTrace; namespace clang { namespace format { @@ -65,8 +66,10 @@ class FormatTest : public ::testing::Test { return getStyleWithColumns(getGoogleStyle(), ColumnLimit); } - void verifyFormat(llvm::StringRef Expected, llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { + void _verifyFormat(const char *File, int Line, llvm::StringRef Expected, + llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + ScopedTrace t(File, Line, ::testing::Message() << Code.str()); EXPECT_EQ(Expected.str(), format(Expected, Style)) << "Expected code is not stable"; EXPECT_EQ(Expected.str(), format(Code, Style)); @@ -79,24 +82,24 @@ class FormatTest : public ::testing::Test { } } - void verifyFormat(llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { - verifyFormat(Code, test::messUp(Code), Style); + void _verifyFormat(const char *File, int Line, llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + _verifyFormat(File, Line, Code, test::messUp(Code), Style); } - void verifyIncompleteFormat(llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { + void _verifyIncompleteFormat(const char *File, int Line, llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + ScopedTrace t(File, Line, ::testing::Message() << Code.str()); EXPECT_EQ(Code.str(), format(test::messUp(Code), Style, SC_ExpectIncomplete)); } - void verifyGoogleFormat(llvm::StringRef Code) { - verifyFormat(Code, getGoogleStyle()); - } - - void verifyIndependentOfContext(llvm::StringRef text) { - verifyFormat(text); - verifyFormat(llvm::Twine("void f() { " + text + " }").str()); + void _verifyIndependentOfContext(const char *File, int Line, + llvm::StringRef Text, + const FormatStyle &Style = getLLVMStyle()) { + _verifyFormat(File, Line, Text, Style); + _verifyFormat(File, Line, llvm::Twine("void f() { " + Text + " }").str(), + Style); } /// \brief Verify that clang-format does not crash on the given input. @@ -108,6 +111,13 @@ class FormatTest : public ::testing::Test { int ReplacementCount; }; +#define verifyIndependentOfContext(...) \ + _verifyIndependentOfContext(__FILE__, __LINE__, __VA_ARGS__) +#define verifyIncompleteFormat(...) \ + _verifyIncompleteFormat(__FILE__, __LINE__, __VA_ARGS__) +#define verifyFormat(...) _verifyFormat(__FILE__, __LINE__, __VA_ARGS__) +#define verifyGoogleFormat(Code) verifyFormat(Code, getGoogleStyle()) + TEST_F(FormatTest, MessUp) { EXPECT_EQ("1 2 3", test::messUp("1 2 3")); EXPECT_EQ("1 2 3\n", test::messUp("1\n2\n3\n")); @@ -158,6 +168,9 @@ TEST_F(FormatTest, NestedNameSpecifiers) { verifyFormat("vector<::Type> v;"); verifyFormat("::ns::SomeFunction(::ns::SomeOtherFunction())"); verifyFormat("static constexpr bool Bar = decltype(bar())::value;"); + verifyFormat("static constexpr bool Bar = typeof(bar())::value;"); + verifyFormat("static constexpr bool Bar = __underlying_type(bar())::value;"); + verifyFormat("static constexpr bool Bar = _Atomic(bar())::value;"); verifyFormat("bool a = 2 < ::SomeFunction();"); verifyFormat("ALWAYS_INLINE ::std::string getName();"); verifyFormat("some::string getName();"); @@ -6668,9 +6681,12 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { Style); // All declarations and definitions should have the return type moved to its - // own - // line. + // own line. Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; + Style.TypenameMacros = {"LIST"}; + verifyFormat("SomeType\n" + "funcdecl(LIST(uint64_t));", + Style); verifyFormat("class E {\n" " int\n" " f() {\n" @@ -7894,7 +7910,11 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("auto PointerBinding = [](const char *S) {};"); verifyFormat("typedef typeof(int(int, int)) *MyFunc;"); verifyFormat("[](const decltype(*a) &value) {}"); + verifyFormat("[](const typeof(*a) &value) {}"); + verifyFormat("[](const _Atomic(a *) &value) {}"); + verifyFormat("[](const __underlying_type(a) &value) {}"); verifyFormat("decltype(a * b) F();"); + verifyFormat("typeof(a * b) F();"); verifyFormat("#define MACRO() [](A *a) { return 1; }"); verifyFormat("Constructor() : member([](A *a, B *b) {}) {}"); verifyIndependentOfContext("typedef void (*f)(int *a);"); @@ -7960,6 +7980,9 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("delete *x;", Left); verifyFormat("typedef typeof(int(int, int))* MyFuncPtr;", Left); verifyFormat("[](const decltype(*a)* ptr) {}", Left); + verifyFormat("[](const typeof(*a)* ptr) {}", Left); + verifyFormat("[](const _Atomic(a*)* ptr) {}", Left); + verifyFormat("[](const __underlying_type(a)* ptr) {}", Left); verifyFormat("typedef typeof /*comment*/ (int(int, int))* MyFuncPtr;", Left); verifyFormat("auto x(A&&, B&&, C&&) -> D;", Left); verifyFormat("auto x = [](A&&, B&&, C&&) -> D {};", Left); @@ -8028,11 +8051,37 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + FormatStyle TypeMacros = getLLVMStyle(); + TypeMacros.TypenameMacros = {"LIST"}; + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); // multiplication + + FormatStyle CustomQualifier = getLLVMStyle(); + // Add indentifers that should not be parsed as a qualifier by default. + CustomQualifier.AttributeMacros.push_back("__my_qualifier"); + CustomQualifier.AttributeMacros.push_back("_My_qualifier"); + CustomQualifier.AttributeMacros.push_back("my_other_qualifier"); + verifyFormat("vector parse_as_multiply;"); + verifyFormat("vector v;", CustomQualifier); + verifyFormat("vector parse_as_multiply;"); + verifyFormat("vector v;", CustomQualifier); + verifyFormat("vector parse_as_multiply;"); + verifyFormat("vector v;", CustomQualifier); verifyFormat("vector v;"); + verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("foo();"); verifyFormat("foo();"); verifyFormat("decltype(*::std::declval()) void F();"); + verifyFormat("typeof(*::std::declval()) void F();"); + verifyFormat("_Atomic(*::std::declval()) void F();"); + verifyFormat("__underlying_type(*::std::declval()) void F();"); verifyFormat( "template ::value &&\n" @@ -8056,6 +8105,10 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(int *i);"); verifyIndependentOfContext("MACRO(auto *a);"); verifyIndependentOfContext("MACRO(const A *a);"); + verifyIndependentOfContext("MACRO(_Atomic(A) *a);"); + verifyIndependentOfContext("MACRO(decltype(A) *a);"); + verifyIndependentOfContext("MACRO(typeof(A) *a);"); + verifyIndependentOfContext("MACRO(__underlying_type(A) *a);"); verifyIndependentOfContext("MACRO(A *const a);"); verifyIndependentOfContext("MACRO(A *restrict a);"); verifyIndependentOfContext("MACRO(A *__restrict__ a);"); @@ -8068,10 +8121,30 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(A *_Null_unspecified a);"); verifyIndependentOfContext("MACRO(A *__attribute__((foo)) a);"); verifyIndependentOfContext("MACRO(A *__attribute((foo)) a);"); + verifyIndependentOfContext("MACRO(A *[[clang::attr]] a);"); + verifyIndependentOfContext("MACRO(A *[[clang::attr(\"foo\")]] a);"); + verifyIndependentOfContext("MACRO(A *__ptr32 a);"); + verifyIndependentOfContext("MACRO(A *__ptr64 a);"); + verifyIndependentOfContext("MACRO(A *__capability);"); + verifyIndependentOfContext("MACRO(A &__capability);"); + verifyFormat("MACRO(A *__my_qualifier);"); // type declaration + verifyFormat("void f() { MACRO(A * __my_qualifier); }"); // multiplication + // If we add __my_qualifier to AttributeMacros it should always be parsed as + // a type declaration: + verifyFormat("MACRO(A *__my_qualifier);", CustomQualifier); + verifyFormat("void f() { MACRO(A *__my_qualifier); }", CustomQualifier); + // Also check that TypenameMacros prevents parsing it as multiplication: + verifyIndependentOfContext("MACRO(LIST(uint64_t) * a);"); // multiplication + verifyIndependentOfContext("MACRO(LIST(uint64_t) *a);", TypeMacros); // type + verifyIndependentOfContext("MACRO('0' <= c && c <= '9');"); verifyFormat("void f() { f(float{1}, a * a); }"); // FIXME: Is there a way to make this work? // verifyIndependentOfContext("MACRO(A *a);"); + verifyFormat("MACRO(A &B);"); + verifyFormat("MACRO(A *B);"); + verifyFormat("void f() { MACRO(A * B); }"); + verifyFormat("void f() { MACRO(A & B); }"); verifyFormat("DatumHandle const *operator->() const { return input_; }"); verifyFormat("return options != nullptr && operator==(*options);"); @@ -8121,10 +8194,47 @@ TEST_F(FormatTest, UnderstandsAttributes) { verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa __attribute__((unused))\n" "aaaaaaaaaaaaaaaaaaaaaaa(int i);"); FormatStyle AfterType = getLLVMStyle(); - AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; + AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; verifyFormat("__attribute__((nodebug)) void\n" "foo() {}\n", AfterType); + verifyFormat("__unused void\n" + "foo() {}", + AfterType); + + FormatStyle CustomAttrs = getLLVMStyle(); + CustomAttrs.AttributeMacros.push_back("__unused"); + CustomAttrs.AttributeMacros.push_back("__attr1"); + CustomAttrs.AttributeMacros.push_back("__attr2"); + CustomAttrs.AttributeMacros.push_back("no_underscore_attr"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + // Check that it is parsed as a multiplication without AttributeMacros and + // as a pointer qualifier when we add __attr1/__attr2 to AttributeMacros. + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + + // Check that these are not parsed as function declarations: + CustomAttrs.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + CustomAttrs.BreakBeforeBraces = FormatStyle::BS_Allman; + verifyFormat("SomeType s(InitValue);", CustomAttrs); + verifyFormat("SomeType s{InitValue};", CustomAttrs); + verifyFormat("SomeType *__unused s(InitValue);", CustomAttrs); + verifyFormat("SomeType *__unused s{InitValue};", CustomAttrs); + verifyFormat("SomeType s __unused(InitValue);", CustomAttrs); + verifyFormat("SomeType s __unused{InitValue};", CustomAttrs); + verifyFormat("SomeType *__capability s(InitValue);", CustomAttrs); + verifyFormat("SomeType *__capability s{InitValue};", CustomAttrs); } TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { @@ -8137,14 +8247,20 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { verifyFormat("x = (foo *_Nullable)*v;"); verifyFormat("x = (foo *_Null_unspecified)*v;"); verifyFormat("x = (foo *_Nonnull)*v;"); + verifyFormat("x = (foo *[[clang::attr]])*v;"); + verifyFormat("x = (foo *[[clang::attr(\"foo\")]])*v;"); + verifyFormat("x = (foo *__ptr32)*v;"); + verifyFormat("x = (foo *__ptr64)*v;"); + verifyFormat("x = (foo *__capability)*v;"); // Check that we handle multiple trailing qualifiers and skip them all to // determine that the expression is a cast to a pointer type. FormatStyle LongPointerRight = getLLVMStyleWithColumns(999); FormatStyle LongPointerLeft = getLLVMStyleWithColumns(999); LongPointerLeft.PointerAlignment = FormatStyle::PAS_Left; - StringRef AllQualifiers = "const volatile restrict __attribute__((foo)) " - "_Nonnull _Null_unspecified _Nonnull"; + StringRef AllQualifiers = + "const volatile restrict __attribute__((foo)) _Nonnull _Null_unspecified " + "_Nonnull [[clang::attr]] __ptr32 __ptr64 __capability"; verifyFormat(("x = (foo *" + AllQualifiers + ")*v;").str(), LongPointerRight); verifyFormat(("x = (foo* " + AllQualifiers + ")*v;").str(), LongPointerLeft); @@ -8152,6 +8268,20 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { verifyFormat("x = (foo *const)&v;"); verifyFormat(("x = (foo *" + AllQualifiers + ")&v;").str(), LongPointerRight); verifyFormat(("x = (foo* " + AllQualifiers + ")&v;").str(), LongPointerLeft); + + // Check custom qualifiers: + FormatStyle CustomQualifier = getLLVMStyleWithColumns(999); + CustomQualifier.AttributeMacros.push_back("__my_qualifier"); + verifyFormat("x = (foo * __my_qualifier) * v;"); // not parsed as qualifier. + verifyFormat("x = (foo *__my_qualifier)*v;", CustomQualifier); + verifyFormat(("x = (foo *" + AllQualifiers + " __my_qualifier)*v;").str(), + CustomQualifier); + verifyFormat(("x = (foo *" + AllQualifiers + " __my_qualifier)&v;").str(), + CustomQualifier); + + // Check that unknown identifiers result in binary operator parsing: + verifyFormat("x = (foo * __unknown_qualifier) * v;"); + verifyFormat("x = (foo * __unknown_qualifier) & v;"); } TEST_F(FormatTest, UnderstandsSquareAttributes) { @@ -8529,6 +8659,12 @@ TEST_F(FormatTest, BreaksLongDeclarations) { "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); verifyFormat("decltype(LoooooooooooooooooooooooooooooooooooooooongName)\n" "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); + verifyFormat("typeof(LoooooooooooooooooooooooooooooooooooooooooongName)\n" + "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); + verifyFormat("_Atomic(LooooooooooooooooooooooooooooooooooooooooongName)\n" + "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); + verifyFormat("__underlying_type(LooooooooooooooooooooooooooooooongName)\n" + "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); verifyFormat("LoooooooooooooooooooooooooooooooooooooooongReturnType\n" "LooooooooooooooooooooooooooongFunctionDeclaration(T... t);"); verifyFormat("LoooooooooooooooooooooooooooooooooooooooongReturnType\n" @@ -8878,6 +9014,8 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { verifyFormat("int foo(int i) { return fo1{}(i); }"); verifyFormat("int foo(int i) { return fo1{}(i); }"); verifyFormat("auto i = decltype(x){};"); + verifyFormat("auto i = typeof(x){};"); + verifyFormat("auto i = _Atomic(x){};"); verifyFormat("std::vector v = {1, 0 /* comment */};"); verifyFormat("Node n{1, Node{1000}, //\n" " 2};"); @@ -11470,6 +11608,9 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto i = std::make_unique(5);", NoSpace); verifyFormat("size_t x = sizeof(x);", NoSpace); verifyFormat("auto f(int x) -> decltype(x);", NoSpace); + verifyFormat("auto f(int x) -> typeof(x);", NoSpace); + verifyFormat("auto f(int x) -> _Atomic(x);", NoSpace); + verifyFormat("auto f(int x) -> __underlying_type(x);", NoSpace); verifyFormat("int f(T x) noexcept(x.create());", NoSpace); verifyFormat("alignas(128) char a[128];", NoSpace); verifyFormat("size_t x = alignof(MyType);", NoSpace); @@ -11518,6 +11659,9 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto i = std::make_unique (5);", Space); verifyFormat("size_t x = sizeof (x);", Space); verifyFormat("auto f (int x) -> decltype (x);", Space); + verifyFormat("auto f (int x) -> typeof (x);", Space); + verifyFormat("auto f (int x) -> _Atomic (x);", Space); + verifyFormat("auto f (int x) -> __underlying_type (x);", Space); verifyFormat("int f (T x) noexcept (x.create ());", Space); verifyFormat("alignas (128) char a[128];", Space); verifyFormat("size_t x = alignof (MyType);", Space); @@ -11570,6 +11714,9 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto i = std::make_unique (5);", SomeSpace); verifyFormat("size_t x = sizeof (x);", SomeSpace); verifyFormat("auto f (int x) -> decltype (x);", SomeSpace); + verifyFormat("auto f (int x) -> typeof (x);", SomeSpace); + verifyFormat("auto f (int x) -> _Atomic (x);", SomeSpace); + verifyFormat("auto f (int x) -> __underlying_type (x);", SomeSpace); verifyFormat("int f (T x) noexcept (x.create());", SomeSpace); verifyFormat("alignas (128) char a[128];", SomeSpace); verifyFormat("size_t x = alignof (MyType);", SomeSpace); @@ -13749,9 +13896,9 @@ TEST_F(FormatTest, GetsCorrectBasedOnStyle) { CHECK_PARSE_NESTED_BOOL_FIELD(STRUCT, FIELD, #FIELD) #define CHECK_PARSE(TEXT, FIELD, VALUE) \ - EXPECT_NE(VALUE, Style.FIELD); \ + EXPECT_NE(VALUE, Style.FIELD) << "Initial value already the same!"; \ EXPECT_EQ(0, parseConfiguration(TEXT, &Style).value()); \ - EXPECT_EQ(VALUE, Style.FIELD) + EXPECT_EQ(VALUE, Style.FIELD) << "Unexpected value after parsing!" TEST_F(FormatTest, ParsesConfigurationBools) { FormatStyle Style = {}; @@ -14141,6 +14288,12 @@ TEST_F(FormatTest, ParsesConfiguration) { CHECK_PARSE("ForEachMacros: [BOOST_FOREACH, Q_FOREACH]", ForEachMacros, BoostAndQForeach); + Style.AttributeMacros.clear(); + CHECK_PARSE("BasedOnStyle: LLVM", AttributeMacros, + std::vector{"__capability"}); + CHECK_PARSE("AttributeMacros: [attr1, attr2]", AttributeMacros, + std::vector({"attr1", "attr2"})); + Style.StatementMacros.clear(); CHECK_PARSE("StatementMacros: [QUNUSED]", StatementMacros, std::vector{"QUNUSED"}); @@ -14818,6 +14971,9 @@ TEST_F(FormatTest, FormatsLambdas) { "});")); verifyFormat("void f() {\n" " SomeFunction([](decltype(x), A *a) {});\n" + " SomeFunction([](typeof(x), A *a) {});\n" + " SomeFunction([](_Atomic(x), A *a) {});\n" + " SomeFunction([](__underlying_type(x), A *a) {});\n" "}"); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " [](const aaaaaaaaaa &a) { return a; });"); @@ -16448,10 +16604,54 @@ TEST_F(FormatTest, TypenameMacros) { verifyFormat("STACK_OF(LIST(int)) a, b;", Macros); verifyFormat("for (LIST(int) *a = NULL; a;) {\n}", Macros); verifyFormat("STACK_OF(int) f(LIST(int) *arg);", Macros); + verifyFormat("vector x;", Macros); + verifyFormat("vector f(LIST(uint64_t) *arg);", Macros); Macros.PointerAlignment = FormatStyle::PAS_Left; verifyFormat("STACK_OF(int)* a;", Macros); verifyFormat("STACK_OF(int*)* a;", Macros); + verifyFormat("x = (STACK_OF(uint64_t))*a;", Macros); + verifyFormat("x = (STACK_OF(uint64_t))&a;", Macros); + verifyFormat("vector x;", Macros); +} + +TEST_F(FormatTest, AtomicQualifier) { + // Check that we treate _Atomic as a type and not a function call + FormatStyle Google = getGoogleStyleWithColumns(0); + verifyFormat("struct foo {\n" + " int a1;\n" + " _Atomic(a) a2;\n" + " _Atomic(_Atomic(int) *const) a3;\n" + "};", + Google); + verifyFormat("_Atomic(uint64_t) a;"); + verifyFormat("_Atomic(uint64_t) *a;"); + verifyFormat("_Atomic(uint64_t const *) *a;"); + verifyFormat("_Atomic(uint64_t *const) *a;"); + verifyFormat("_Atomic(const uint64_t *) *a;"); + verifyFormat("_Atomic(uint64_t) a;"); + verifyFormat("_Atomic(_Atomic(uint64_t)) a;"); + verifyFormat("_Atomic(_Atomic(uint64_t)) a, b;"); + verifyFormat("for (_Atomic(uint64_t) *a = NULL; a;) {\n}"); + verifyFormat("_Atomic(uint64_t) f(_Atomic(uint64_t) *arg);"); + + verifyFormat("_Atomic(uint64_t) *s(InitValue);"); + verifyFormat("_Atomic(uint64_t) *s{InitValue};"); + FormatStyle Style = getLLVMStyle(); + Style.PointerAlignment = FormatStyle::PAS_Left; + verifyFormat("_Atomic(uint64_t)* s(InitValue);", Style); + verifyFormat("_Atomic(uint64_t)* s{InitValue};", Style); + verifyFormat("_Atomic(int)* a;", Style); + verifyFormat("_Atomic(int*)* a;", Style); + verifyFormat("vector<_Atomic(uint64_t)* attr> x;", Style); + + Style.SpacesInCStyleCastParentheses = true; + Style.SpacesInParentheses = false; + verifyFormat("x = ( _Atomic(uint64_t) )*a;", Style); + Style.SpacesInCStyleCastParentheses = false; + Style.SpacesInParentheses = true; + verifyFormat("x = (_Atomic( uint64_t ))*a;", Style); + verifyFormat("x = (_Atomic( uint64_t ))&a;", Style); } TEST_F(FormatTest, AmbersandInLamda) { diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp index 26158b1520f90..a8d6d3dd851da 100644 --- a/clang/unittests/Tooling/TransformerTest.cpp +++ b/clang/unittests/Tooling/TransformerTest.cpp @@ -25,6 +25,7 @@ using ::testing::ElementsAre; using ::testing::IsEmpty; using transformer::cat; using transformer::changeTo; +using transformer::rewriteDescendants; using transformer::RewriteRule; constexpr char KHeaderContents[] = R"cc( @@ -568,6 +569,88 @@ TEST_F(TransformerTest, RewriteDescendantsInvalidNodeType) { EXPECT_EQ(ErrorCount, 1); } +// +// We include one test per typed overload. We don't test extensively since that +// is already covered by the tests above. +// + +TEST_F(TransformerTest, RewriteDescendantsTypedStmt) { + // Add an unrelated definition to the header that also has a variable named + // "x", to test that the rewrite is limited to the scope we intend. + appendToHeader(R"cc(int g(int x) { return x; })cc"); + std::string Input = + "int f(int x) { int y = x; { int z = x * x; } return x; }"; + std::string Expected = + "int f(int x) { int y = 3; { int z = 3 * 3; } return 3; }"; + auto InlineX = + makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); + testRule(makeRule(functionDecl(hasName("f"), hasBody(stmt().bind("body"))), + [&InlineX](const MatchFinder::MatchResult &R) { + const auto *Node = R.Nodes.getNodeAs("body"); + assert(Node != nullptr && "body must be bound"); + return transformer::detail::rewriteDescendants( + *Node, InlineX, R); + }), + Input, Expected); +} + +TEST_F(TransformerTest, RewriteDescendantsTypedDecl) { + std::string Input = + "int f(int x) { int y = x; { int z = x * x; } return x; }"; + std::string Expected = + "int f(int x) { int y = 3; { int z = 3 * 3; } return 3; }"; + auto InlineX = + makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); + testRule(makeRule(functionDecl(hasName("f")).bind("fun"), + [&InlineX](const MatchFinder::MatchResult &R) { + const auto *Node = R.Nodes.getNodeAs("fun"); + assert(Node != nullptr && "fun must be bound"); + return transformer::detail::rewriteDescendants( + *Node, InlineX, R); + }), + Input, Expected); +} + +TEST_F(TransformerTest, RewriteDescendantsTypedTypeLoc) { + std::string Input = "int f(int *x) { return *x; }"; + std::string Expected = "int f(char *x) { return *x; }"; + auto IntToChar = + makeRule(typeLoc(loc(qualType(isInteger(), builtinType()))).bind("loc"), + changeTo(cat("char"))); + testRule( + makeRule( + functionDecl( + hasName("f"), + hasParameter(0, varDecl(hasTypeLoc(typeLoc().bind("parmType"))))), + [&IntToChar](const MatchFinder::MatchResult &R) { + const auto *Node = R.Nodes.getNodeAs("parmType"); + assert(Node != nullptr && "parmType must be bound"); + return transformer::detail::rewriteDescendants(*Node, IntToChar, R); + }), + Input, Expected); +} + +TEST_F(TransformerTest, RewriteDescendantsTypedDynTyped) { + // Add an unrelated definition to the header that also has a variable named + // "x", to test that the rewrite is limited to the scope we intend. + appendToHeader(R"cc(int g(int x) { return x; })cc"); + std::string Input = + "int f(int x) { int y = x; { int z = x * x; } return x; }"; + std::string Expected = + "int f(int x) { int y = 3; { int z = 3 * 3; } return 3; }"; + auto InlineX = + makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); + testRule( + makeRule(functionDecl(hasName("f"), hasBody(stmt().bind("body"))), + [&InlineX](const MatchFinder::MatchResult &R) { + auto It = R.Nodes.getMap().find("body"); + assert(It != R.Nodes.getMap().end() && "body must be bound"); + return transformer::detail::rewriteDescendants(It->second, + InlineX, R); + }), + Input, Expected); +} + TEST_F(TransformerTest, InsertBeforeEdit) { std::string Input = R"cc( int f() { @@ -878,10 +961,8 @@ TEST_F(TransformerTest, OrderedRuleMultipleKinds) { } // Verifies that a rule with a top-level matcher for an implicit node (like -// `implicitCastExpr`) does not change the code, when the AST traversal skips -// implicit nodes. In this test, only the rule with the explicit-node matcher -// will fire. -TEST_F(TransformerTest, OrderedRuleImplicitIgnored) { +// `implicitCastExpr`) works correctly -- the implicit nodes are not skipped. +TEST_F(TransformerTest, OrderedRuleImplicitMatched) { std::string Input = R"cc( void f1(); int f2(); @@ -892,7 +973,7 @@ TEST_F(TransformerTest, OrderedRuleImplicitIgnored) { void f1(); int f2(); void call_f1() { REPLACE_F1; } - float call_f2() { return f2(); } + float call_f2() { return REPLACE_F2; } )cc"; RewriteRule ReplaceF1 = @@ -904,32 +985,6 @@ TEST_F(TransformerTest, OrderedRuleImplicitIgnored) { testRule(applyFirst({ReplaceF1, ReplaceF2}), Input, Expected); } -// Verifies that explicitly setting the traversal kind fixes the problem in the -// previous test. -TEST_F(TransformerTest, OrderedRuleImplicitMatched) { - std::string Input = R"cc( - void f1(); - int f2(); - void call_f1() { f1(); } - float call_f2() { return f2(); } - )cc"; - std::string Expected = R"cc( - void f1(); - int f2(); - void call_f1() { REPLACE_F1; } - float call_f2() { return REPLACE_F2; } - )cc"; - - RewriteRule ReplaceF1 = makeRule( - traverse(clang::TK_AsIs, callExpr(callee(functionDecl(hasName("f1"))))), - changeTo(cat("REPLACE_F1"))); - RewriteRule ReplaceF2 = - makeRule(traverse(clang::TK_AsIs, - implicitCastExpr(hasSourceExpression(callExpr()))), - changeTo(cat("REPLACE_F2"))); - testRule(applyFirst({ReplaceF1, ReplaceF2}), Input, Expected); -} - // // Negative tests (where we expect no transformation to occur). // diff --git a/clang/unittests/libclang/LibclangTest.cpp b/clang/unittests/libclang/LibclangTest.cpp index e2e3a8e887bab..fc3ad43b495cf 100644 --- a/clang/unittests/libclang/LibclangTest.cpp +++ b/clang/unittests/libclang/LibclangTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-c/Index.h" +#include "clang-c/Rewrite.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" @@ -736,3 +737,196 @@ TEST_F(LibclangSerializationTest, TokenKindsAreCorrectAfterLoading) { CheckTokenKinds(); } + +TEST_F(LibclangParseTest, clang_getVarDeclInitializer) { + std::string Main = "main.cpp"; + WriteFile(Main, "int foo() { return 5; }; const int a = foo();"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + const CXCursor Initializer = clang_Cursor_getVarDeclInitializer(cursor); + EXPECT_FALSE(clang_Cursor_isNull(Initializer)); + CXString Spelling = clang_getCursorSpelling(Initializer); + const char* const SpellingCSstr = clang_getCString(Spelling); + EXPECT_TRUE(SpellingCSstr); + EXPECT_EQ(std::string(SpellingCSstr), std::string("foo")); + clang_disposeString(Spelling); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_hasVarDeclGlobalStorageFalse) { + std::string Main = "main.cpp"; + WriteFile(Main, "void foo() { int a; }"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_FALSE(clang_Cursor_hasVarDeclGlobalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclGlobalStorageTrue) { + std::string Main = "main.cpp"; + WriteFile(Main, "int a;"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_TRUE(clang_Cursor_hasVarDeclGlobalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclExternalStorageFalse) { + std::string Main = "main.cpp"; + WriteFile(Main, "int a;"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_FALSE(clang_Cursor_hasVarDeclExternalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclExternalStorageTrue) { + std::string Main = "main.cpp"; + WriteFile(Main, "extern int a;"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_TRUE(clang_Cursor_hasVarDeclExternalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} +class LibclangRewriteTest : public LibclangParseTest { +public: + CXRewriter Rew = nullptr; + std::string Filename; + CXFile File = nullptr; + + void SetUp() override { + LibclangParseTest::SetUp(); + Filename = "file.cpp"; + WriteFile(Filename, "int main() { return 0; }"); + ClangTU = clang_parseTranslationUnit(Index, Filename.c_str(), nullptr, 0, + nullptr, 0, TUFlags); + Rew = clang_CXRewriter_create(ClangTU); + File = clang_getFile(ClangTU, Filename.c_str()); + } + void TearDown() override { + clang_CXRewriter_dispose(Rew); + LibclangParseTest::TearDown(); + } +}; + +static std::string getFileContent(const std::string& Filename) { + std::ifstream RewrittenFile(Filename); + std::string RewrittenFileContent; + std::string Line; + while (std::getline(RewrittenFile, Line)) { + if (RewrittenFileContent.empty()) + RewrittenFileContent = Line; + else { + RewrittenFileContent += "\n" + Line; + } + } + return RewrittenFileContent; +} + +TEST_F(LibclangRewriteTest, RewriteReplace) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_replaceText(Rew, Rng, "MAIN"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int MAIN() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteReplaceShorter) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_replaceText(Rew, Rng, "foo"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int foo() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteReplaceLonger) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_replaceText(Rew, Rng, "patatino"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int patatino() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteInsert) { + CXSourceLocation Loc = clang_getLocation(ClangTU, File, 1, 5); + + clang_CXRewriter_insertTextBefore(Rew, Loc, "ro"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int romain() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteRemove) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_removeText(Rew, Rng); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int () { return 0; }"); +} diff --git a/compiler-rt/.clang-tidy b/compiler-rt/.clang-tidy new file mode 100644 index 0000000000000..4bad5ef216200 --- /dev/null +++ b/compiler-rt/.clang-tidy @@ -0,0 +1,2 @@ +# Checks enabled in the top-level .clang-tidy minus readability-identifier-naming and llvm-header-guard. +Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes' diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index c7e86946bcf35..0a0294f937dba 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -544,7 +544,8 @@ add_subdirectory(lib) if(COMPILER_RT_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) - if (COMPILER_RT_STANDALONE_BUILD) + # Don't build llvm-lit for runtimes-build, it will clean up map_config. + if (COMPILER_RT_STANDALONE_BUILD AND NOT RUNTIMES_BUILD) # If we have a valid source tree, generate llvm-lit into the bin directory. # The user can still choose to have the check targets *use* a different lit # by specifying -DLLVM_EXTERNAL_LIT, but we generate it regardless. diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 039605d5ca21e..7334b7200fc4c 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -71,16 +71,11 @@ static AsanAllocator &get_allocator(); static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { - // 1-st 8 bytes. - u32 chunk_state : 8; // Must be first. - u32 alloc_tid : 24; - - u32 free_tid : 24; - u32 from_memalign : 1; - u32 alloc_type : 2; - u32 rz_log : 3; - u32 lsan_tag : 2; - // 2-nd 8 bytes + atomic_uint8_t chunk_state; + u8 from_memalign : 1; + u8 alloc_type : 2; + u8 rz_log : 3; + u8 lsan_tag : 2; // This field is used for small sizes. For large sizes it is equal to // SizeClassMap::kMaxSize and the actual size is stored in the // SecondaryAllocator's metadata. @@ -88,12 +83,14 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - u32 alloc_context_id; + u32 alloc_tid; + atomic_uint32_t alloc_context_id; }; struct ChunkBase : ChunkHeader { // Header2, intersects with user memory. u32 free_context_id; + u32 free_tid; }; static const uptr kChunkHeaderSize = sizeof(ChunkHeader); @@ -101,14 +98,15 @@ static const uptr kChunkHeader2Size = sizeof(ChunkBase) - kChunkHeaderSize; COMPILER_CHECK(kChunkHeaderSize == 16); COMPILER_CHECK(kChunkHeader2Size <= 16); -// Every chunk of memory allocated by this allocator can be in one of 3 states: -// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated. -// CHUNK_ALLOCATED: the chunk is allocated and not yet freed. -// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone. enum { - CHUNK_AVAILABLE = 0, // 0 is the default value even if we didn't set it. - CHUNK_ALLOCATED = 2, - CHUNK_QUARANTINE = 3 + // Either just allocated by underlying allocator, but AsanChunk is not yet + // ready, or almost returned to undelying allocator and AsanChunk is already + // meaningless. + CHUNK_INVALID = 0, + // The chunk is allocated and not yet freed. + CHUNK_ALLOCATED = 2, + // The chunk was freed and put into quarantine zone. + CHUNK_QUARANTINE = 3, }; struct AsanChunk: ChunkBase { @@ -117,7 +115,7 @@ struct AsanChunk: ChunkBase { if (user_requested_size != SizeClassMap::kMaxSize) return user_requested_size; return *reinterpret_cast( - get_allocator().GetMetaData(AllocBeg(locked_version))); + get_allocator().GetMetaData(AllocBeg(locked_version))); } void *AllocBeg(bool locked_version = false) { if (from_memalign) { @@ -140,8 +138,12 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - atomic_store((atomic_uint8_t*)m, CHUNK_AVAILABLE, memory_order_relaxed); + u8 old_chunk_state = CHUNK_QUARANTINE; + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, + CHUNK_INVALID, memory_order_acquire)) { + CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); + } + CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -301,22 +303,25 @@ struct Allocator { // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); - uptr chunk_end = chunk + allocated_size; - if (chunk < beg && beg < end && end <= chunk_end && - ac->chunk_state == CHUNK_ALLOCATED) { - // Looks like a valid AsanChunk in use, poison redzones only. - PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); - uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); - FastPoisonShadowPartialRightRedzone( - end_aligned_down, end - end_aligned_down, - chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); - } else { - // This is either not an AsanChunk or freed or quarantined AsanChunk. - // In either case, poison everything. - PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); + if (atomic_load(&ac->chunk_state, memory_order_acquire) == + CHUNK_ALLOCATED) { + uptr beg = ac->Beg(); + uptr end = ac->Beg() + ac->UsedSize(true); + uptr chunk_end = chunk + allocated_size; + if (chunk < beg && beg < end && end <= chunk_end) { + // Looks like a valid AsanChunk in use, poison redzones only. + PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); + uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); + FastPoisonShadowPartialRightRedzone( + end_aligned_down, end - end_aligned_down, + chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); + return; + } } + + // This is either not an AsanChunk or freed or quarantined AsanChunk. + // In either case, poison everything. + PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } void ReInitialize(const AllocatorOptions &options) { @@ -381,14 +386,17 @@ struct Allocator { AsanChunk *right_chunk) { // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. - if (left_chunk->chunk_state != right_chunk->chunk_state) { - if (left_chunk->chunk_state == CHUNK_ALLOCATED) + u8 left_state = atomic_load(&left_chunk->chunk_state, memory_order_relaxed); + u8 right_state = + atomic_load(&right_chunk->chunk_state, memory_order_relaxed); + if (left_state != right_state) { + if (left_state == CHUNK_ALLOCATED) return left_chunk; - if (right_chunk->chunk_state == CHUNK_ALLOCATED) + if (right_state == CHUNK_ALLOCATED) return right_chunk; - if (left_chunk->chunk_state == CHUNK_QUARANTINE) + if (left_state == CHUNK_QUARANTINE) return left_chunk; - if (right_chunk->chunk_state == CHUNK_QUARANTINE) + if (right_state == CHUNK_QUARANTINE) return right_chunk; } // Same chunk_state: choose based on offset. @@ -403,9 +411,10 @@ struct Allocator { bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) { AsanChunk *m = GetAsanChunkByAddr(addr); if (!m) return false; - if (m->chunk_state != CHUNK_ALLOCATED) return false; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return false; if (m->Beg() != addr) return false; - atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack), + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), memory_order_relaxed); return true; } @@ -504,10 +513,9 @@ struct Allocator { u32 alloc_tid = t ? t->tid() : 0; m->alloc_tid = alloc_tid; CHECK_EQ(alloc_tid, m->alloc_tid); // Does alloc_tid fit into the bitfield? - m->free_tid = kInvalidTid; m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { - CHECK_LE(alloc_beg+ 2 * sizeof(uptr), chunk_beg); + CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; reinterpret_cast(alloc_beg)[1] = chunk_beg; } @@ -524,7 +532,8 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - m->alloc_context_id = StackDepotPut(*stack); + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), + memory_order_relaxed); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -557,7 +566,7 @@ struct Allocator { : __lsan::kDirectlyLeaked; #endif // Must be the last mutation of metadata in this function. - atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release); + atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); ASAN_MALLOC_HOOK(res, size); return res; } @@ -565,10 +574,10 @@ struct Allocator { // Set quarantine flag if chunk is allocated, issue ASan error report on // available and quarantined chunks. Return true on success, false otherwise. bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr, - BufferedStackTrace *stack) { + BufferedStackTrace *stack) { u8 old_chunk_state = CHUNK_ALLOCATED; // Flip the chunk_state atomically to avoid race on double-free. - if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state, + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { ReportInvalidFree(ptr, old_chunk_state, stack); @@ -576,16 +585,17 @@ struct Allocator { return false; } CHECK_EQ(CHUNK_ALLOCATED, old_chunk_state); + // It was a user data. + m->free_tid = kInvalidTid; + m->free_context_id = 0; return true; } // Expects the chunk to already be marked as quarantined by using // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - CHECK_GE(m->alloc_tid, 0); - if (SANITIZER_WORDSIZE == 64) // On 32-bits this resides in user area. - CHECK_EQ(m->free_tid, kInvalidTid); + CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), + CHUNK_QUARANTINE); AsanThread *t = GetCurrentThread(); m->free_tid = t ? t->tid() : 0; m->free_context_id = StackDepotPut(*stack); @@ -677,7 +687,7 @@ struct Allocator { void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true); if (new_ptr) { - u8 chunk_state = m->chunk_state; + u8 chunk_state = atomic_load(&m->chunk_state, memory_order_acquire); if (chunk_state != CHUNK_ALLOCATED) ReportInvalidFree(old_ptr, chunk_state, stack); CHECK_NE(REAL(memcpy), nullptr); @@ -721,7 +731,8 @@ struct Allocator { // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). AsanChunk *GetAsanChunk(void *alloc_beg) { - if (!alloc_beg) return nullptr; + if (!alloc_beg) + return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); @@ -730,15 +741,20 @@ struct Allocator { uptr *alloc_magic = reinterpret_cast(alloc_beg); if (alloc_magic[0] == kAllocBegMagic) return reinterpret_cast(alloc_magic[1]); + // FIXME: This is either valid small chunk with tiny redzone or invalid + // chunk which is beeing allocated/deallocated. The latter case should + // return nullptr like secondary allocator does. return reinterpret_cast(alloc_beg); } AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) return nullptr; + if (!alloc_beg) + return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, m); + Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, + m); return m; } uptr *alloc_magic = reinterpret_cast(alloc_beg); @@ -751,7 +767,6 @@ struct Allocator { return reinterpret_cast(alloc_beg); } - AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -767,14 +782,16 @@ struct Allocator { AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { void *alloc_beg = allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, alloc_beg); + Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, + alloc_beg); return GetAsanChunkDebug(alloc_beg); } uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; - if (m->chunk_state != CHUNK_ALLOCATED) return 0; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return 0; if (m->Beg() != p) return 0; return m->UsedSize(); } @@ -840,13 +857,16 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != + CHUNK_INVALID; } bool AsanChunkView::IsAllocated() const { - return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_ALLOCATED; } bool AsanChunkView::IsQuarantined() const { - return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_QUARANTINE; } uptr AsanChunkView::Beg() const { return chunk_->Beg(); } uptr AsanChunkView::End() const { return Beg() + UsedSize(); } @@ -855,7 +875,9 @@ u32 AsanChunkView::UserRequestedAlignment() const { return Allocator::ComputeUserAlignment(chunk_->user_requested_alignment_log); } uptr AsanChunkView::AllocTid() const { return chunk_->alloc_tid; } -uptr AsanChunkView::FreeTid() const { return chunk_->free_tid; } +uptr AsanChunkView::FreeTid() const { + return IsQuarantined() ? chunk_->free_tid : kInvalidTid; +} AllocType AsanChunkView::GetAllocType() const { return (AllocType)chunk_->alloc_type; } @@ -867,8 +889,12 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } -u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } -u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; } +u32 AsanChunkView::GetAllocStackId() const { + return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); +} +u32 AsanChunkView::GetFreeStackId() const { + return IsQuarantined() ? chunk_->free_context_id : 0; +} StackTrace AsanChunkView::GetAllocStack() const { return GetStackTraceFromId(GetAllocStackId()); @@ -1032,7 +1058,7 @@ void AsanSoftRssLimitExceededCallback(bool limit_exceeded) { instance.SetRssLimitExceeded(limit_exceeded); } -} // namespace __asan +} // namespace __asan // --- Implementation of LSan-specific functions --- {{{1 namespace __lsan { @@ -1052,10 +1078,10 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { uptr PointsIntoChunk(void* p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); - if (!m) return 0; - uptr chunk = m->Beg(); - if (m->chunk_state != __asan::CHUNK_ALLOCATED) + if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != + __asan::CHUNK_ALLOCATED) return 0; + uptr chunk = m->Beg(); if (m->AddrIsInside(addr, /*locked_version=*/true)) return chunk; if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), @@ -1069,7 +1095,8 @@ extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; void GetUserBeginDebug(uptr chunk) { Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); + __asan::AsanChunk *m = + __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); Printf("GetUserBeginDebug2 m %p\n", m); } @@ -1096,7 +1123,8 @@ LsanMetadata::LsanMetadata(uptr chunk) { bool LsanMetadata::allocated() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->chunk_state == __asan::CHUNK_ALLOCATED; + return atomic_load(&m->chunk_state, memory_order_relaxed) == + __asan::CHUNK_ALLOCATED; } ChunkTag LsanMetadata::tag() const { @@ -1116,7 +1144,7 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->alloc_context_id; + return atomic_load(&m->alloc_context_id, memory_order_relaxed); } void ForEachChunk(ForEachChunkCallback callback, void *arg) { @@ -1127,14 +1155,15 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); if (!m) return kIgnoreObjectInvalid; - if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) { + if ((atomic_load(&m->chunk_state, memory_order_acquire) == + __asan::CHUNK_ALLOCATED) && + m->AddrIsInside(addr)) { if (m->lsan_tag == kIgnored) return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; return kIgnoreObjectSuccess; - } else { - return kIgnoreObjectInvalid; } + return kIgnoreObjectInvalid; } } // namespace __lsan diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index b37d8ef4e8d29..d60b97500a3c3 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -171,7 +171,7 @@ template struct AP32 { static const uptr kSpaceBeg = 0; static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; - static const uptr kMetadataSize = 16; + static const uptr kMetadataSize = 0; typedef __asan::SizeClassMap SizeClassMap; static const uptr kRegionSizeLog = 20; using AddressSpaceView = AddressSpaceViewTy; diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index a2e38238a97c6..1e78cb1b0e77a 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -89,8 +89,7 @@ def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): def open_llvm_symbolizer(self): cmd = [self.symbolizer_path, - '--use-symbol-table=true', - '--demangle=%s' % demangle, + ('--demangle' if demangle else '--no-demangle'), '--functions=linkage', '--inlines', '--default-arch=%s' % self.default_arch] diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index 72e02e613de50..29e31f55d4998 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -33,7 +33,7 @@ uintptr_t GetCurrentProcess(void); #include #endif -#if defined(__OpenBSD__) && defined(__mips__) +#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__)) // clang-format off #include #include @@ -58,7 +58,7 @@ void __clear_cache(void *start, void *end) { #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) FlushInstructionCache(GetCurrentProcess(), start, end - start); #elif defined(__arm__) && !defined(__APPLE__) -#if defined(__FreeBSD__) || defined(__NetBSD__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) struct arm_sync_icache_args arg; arg.addr = (uintptr_t)start; diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h index 54d1e09ec6df0..daea4f5213b18 100644 --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -18,6 +18,7 @@ #include "FuzzerSHA1.h" #include "FuzzerTracePC.h" #include +#include #include #include #include @@ -26,6 +27,7 @@ namespace fuzzer { struct InputInfo { Unit U; // The actual input data. + std::chrono::microseconds TimeOfUnit; uint8_t Sha1[kSHA1NumBytes]; // Checksum. // Number of features that this input has and no smaller input has. size_t NumFeatures = 0; @@ -33,6 +35,7 @@ struct InputInfo { // Stats. size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; + bool NeverReduce = false; bool MayDeleteFile = false; bool Reduced = false; bool HasFocusFunction = false; @@ -61,11 +64,15 @@ struct InputInfo { } // Assign more energy to a high-entropy seed, i.e., that reveals more - // information about the globally rare features in the neighborhood - // of the seed. Since we do not know the entropy of a seed that has - // never been executed we assign fresh seeds maximum entropy and - // let II->Energy approach the true entropy from above. - void UpdateEnergy(size_t GlobalNumberOfFeatures) { + // information about the globally rare features in the neighborhood of the + // seed. Since we do not know the entropy of a seed that has never been + // executed we assign fresh seeds maximum entropy and let II->Energy approach + // the true entropy from above. If ScalePerExecTime is true, the computed + // entropy is scaled based on how fast this input executes compared to the + // average execution time of inputs. The faster an input executes, the more + // energy gets assigned to the input. + void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime, + std::chrono::microseconds AverageUnitExecutionTime) { Energy = 0.0; SumIncidence = 0; @@ -88,6 +95,27 @@ struct InputInfo { // Normalize. if (SumIncidence != 0) Energy = (Energy / SumIncidence) + logl(SumIncidence); + + if (ScalePerExecTime) { + // Scaling to favor inputs with lower execution time. + uint32_t PerfScore = 100; + if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10) + PerfScore = 10; + else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4) + PerfScore = 25; + else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2) + PerfScore = 50; + else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4) + PerfScore = 75; + else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count()) + PerfScore = 300; + else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count()) + PerfScore = 200; + else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count()) + PerfScore = 150; + + Energy *= PerfScore; + } } // Increment the frequency of the feature Idx. @@ -120,6 +148,7 @@ struct EntropicOptions { bool Enabled; size_t NumberOfRarestFeatures; size_t FeatureFrequencyThreshold; + bool ScalePerExecTime; }; class InputCorpus { @@ -177,7 +206,8 @@ class InputCorpus { bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, - bool HasFocusFunction, + bool HasFocusFunction, bool NeverReduce, + std::chrono::microseconds TimeOfUnit, const Vector &FeatureSet, const DataFlowTrace &DFT, const InputInfo *BaseII) { assert(!U.empty()); @@ -187,6 +217,8 @@ class InputCorpus { InputInfo &II = *Inputs.back(); II.U = U; II.NumFeatures = NumFeatures; + II.NeverReduce = NeverReduce; + II.TimeOfUnit = TimeOfUnit; II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; @@ -268,6 +300,15 @@ class InputCorpus { return II; } + InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) { + if (!UniformDist) { + return ChooseUnitToMutate(Rand); + } + InputInfo &II = *Inputs[Rand(Inputs.size())]; + assert(!II.U.empty()); + return II; + } + // Returns an index of random unit from the corpus to mutate. size_t ChooseUnitIdxToMutate(Random &Rand) { UpdateCorpusDistribution(Rand); @@ -460,12 +501,19 @@ class InputCorpus { Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); + std::chrono::microseconds AverageUnitExecutionTime(0); + for (auto II : Inputs) { + AverageUnitExecutionTime += II->TimeOfUnit; + } + AverageUnitExecutionTime /= N; + bool VanillaSchedule = true; if (Entropic.Enabled) { for (auto II : Inputs) { if (II->NeedsEnergyUpdate && II->Energy != 0.0) { II->NeedsEnergyUpdate = false; - II->UpdateEnergy(RareFeatures.size()); + II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime, + AverageUnitExecutionTime); } } diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index 4669b12786fc2..caafd1dbb0a7b 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -250,11 +250,26 @@ static void WorkerThread(const Command &BaseCmd, std::atomic *Counter, } } -static void ValidateDirectoryExists(const std::string &Path) { - if (!Path.empty() && !IsDirectory(Path)) { - Printf("ERROR: The required directory \"%s\" does not exist\n", Path.c_str()); +static void ValidateDirectoryExists(const std::string &Path, + bool CreateDirectory) { + if (Path.empty()) { + Printf("ERROR: Provided directory path is an empty string\n"); exit(1); } + + if (IsDirectory(Path)) + return; + + if (CreateDirectory) { + if (!MkDirRecursive(Path)) { + Printf("ERROR: Failed to create directory \"%s\"\n", Path.c_str()); + exit(1); + } + return; + } + + Printf("ERROR: The required directory \"%s\" does not exist\n", Path.c_str()); + exit(1); } std::string CloneArgsWithoutX(const Vector &Args, @@ -656,6 +671,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.LenControl = Flags.len_control; + Options.KeepSeed = Flags.keep_seed; Options.UnitTimeoutSec = Flags.timeout; Options.ErrorExitCode = Flags.error_exitcode; Options.TimeoutExitCode = Flags.timeout_exitcode; @@ -664,6 +680,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.IgnoreCrashes = Flags.ignore_crashes; Options.MaxTotalTimeSec = Flags.max_total_time; Options.DoCrossOver = Flags.cross_over; + Options.CrossOverUniformDist = Flags.cross_over_uniform_dist; Options.MutateDepth = Flags.mutate_depth; Options.ReduceDepth = Flags.reduce_depth; Options.UseCounters = Flags.use_counters; @@ -691,7 +708,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { std::string OutputCorpusDir = (*Inputs)[0]; if (!IsFile(OutputCorpusDir)) { Options.OutputCorpus = OutputCorpusDir; - ValidateDirectoryExists(Options.OutputCorpus); + ValidateDirectoryExists(Options.OutputCorpus, Flags.create_missing_dirs); } } Options.ReportSlowUnits = Flags.report_slow_units; @@ -705,11 +722,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { if (!IsSeparator(ArtifactPathDir[ArtifactPathDir.length() - 1])) { ArtifactPathDir = DirName(ArtifactPathDir); } - ValidateDirectoryExists(ArtifactPathDir); + ValidateDirectoryExists(ArtifactPathDir, Flags.create_missing_dirs); } if (Flags.exact_artifact_path) { Options.ExactArtifactPath = Flags.exact_artifact_path; - ValidateDirectoryExists(DirName(Options.ExactArtifactPath)); + ValidateDirectoryExists(DirName(Options.ExactArtifactPath), + Flags.create_missing_dirs); } Vector Dictionary; if (Flags.dict) @@ -735,7 +753,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.DataFlowTrace = Flags.data_flow_trace; if (Flags.features_dir) { Options.FeaturesDir = Flags.features_dir; - ValidateDirectoryExists(Options.FeaturesDir); + ValidateDirectoryExists(Options.FeaturesDir, Flags.create_missing_dirs); } if (Flags.collect_data_flow) Options.CollectDataFlow = Flags.collect_data_flow; @@ -746,6 +764,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { (size_t)Flags.entropic_feature_frequency_threshold; Options.EntropicNumberOfRarestFeatures = (size_t)Flags.entropic_number_of_rarest_features; + Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time; if (Options.Entropic) { if (!Options.FocusFunction.empty()) { Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " @@ -761,6 +780,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Entropic.FeatureFrequencyThreshold = Options.EntropicFeatureFrequencyThreshold; Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures; + Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime; unsigned Seed = Flags.seed; // Initialize Seed. diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index 832224a705d2b..fdb8362cef9d4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -23,7 +23,21 @@ FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, " FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files " "to use as an additional seed corpus. Alternatively, an \"@\" followed by " "the name of a file containing the comma-separated list.") +FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if " + "they do not produce new coverage. When used with |reduce_inputs==1|, the " + "seed inputs will never be reduced. This option can be useful when seeds are" + "not properly formed for the fuzz target but still have useful snippets.") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") +FUZZER_FLAG_INT(cross_over_uniform_dist, 0, "Experimental. If 1, use a " + "uniform probability distribution when choosing inputs to cross over with. " + "Some of the inputs in the corpus may never get chosen for mutation " + "depending on the input mutation scheduling policy. With this flag, all " + "inputs, regardless of the input mutation scheduling policy, can be chosen " + "as an input to cross over with. This can be particularly useful with " + "|keep_seed==1|; all the initial seed inputs, even though they do not " + "increase coverage because they are not properly formed, will still be " + "chosen as an input to cross over with.") + FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") FUZZER_FLAG_INT(reduce_depth, 0, "Experimental/internal. " @@ -161,9 +175,18 @@ FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If " "entropic is enabled, we keep track of the frequencies only for the " "Top-X least abundant features (union features that are considered as " "rare).") +FUZZER_FLAG_INT(entropic_scale_per_exec_time, 0, "Experimental. If 1, " + "the Entropic power schedule gets scaled based on the input execution " + "time. Inputs with lower execution time get scheduled more (up to 30x). " + "Note that, if 1, fuzzer stops from being deterministic even if a " + "non-zero random seed is given.") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") FUZZER_FLAG_STRING(collect_data_flow, "Experimental: collect the data flow trace") + +FUZZER_FLAG_INT(create_missing_dirs, 0, "Automatically attempt to create " + "directories for arguments that would normally expect them to already " + "exist (i.e. artifact_prefix, exact_artifact_path, features_dir, corpus)") diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp index d9e6b79443e0d..84725d22a9c78 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -309,11 +309,15 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, else Env.MainCorpusDir = CorpusDirs[0]; - auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); - CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, - {}, &Env.Cov, - CFPath, false); - RemoveFile(CFPath); + if (Options.KeepSeed) { + for (auto &File : SeedFiles) + Env.Files.push_back(File.File); + } else { + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); + CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, + {}, &Env.Cov, CFPath, false); + RemoveFile(CFPath); + } Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp index cbb1dbe1b86d2..c3330c3425d09 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp @@ -144,6 +144,38 @@ void VPrintf(bool Verbose, const char *Fmt, ...) { fflush(OutputFile); } +static bool MkDirRecursiveInner(const std::string &Leaf) { + // Prevent chance of potential infinite recursion + if (Leaf == ".") + return true; + + const std::string &Dir = DirName(Leaf); + + if (IsDirectory(Dir)) { + MkDir(Leaf); + return IsDirectory(Leaf); + } + + bool ret = MkDirRecursiveInner(Dir); + if (!ret) { + // Give up early if a previous MkDir failed + return ret; + } + + MkDir(Leaf); + return IsDirectory(Leaf); +} + +bool MkDirRecursive(const std::string &Dir) { + if (Dir.empty()) + return false; + + if (IsDirectory(Dir)) + return true; + + return MkDirRecursiveInner(Dir); +} + void RmDirRecursive(const std::string &Dir) { IterateDirRecursive( Dir, [](const std::string &Path) {}, diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index 8def2e96304e7..6e3a0b470c5f6 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -64,6 +64,7 @@ size_t FileSize(const std::string &Path); void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector *V, bool TopDir); +bool MkDirRecursive(const std::string &Dir); void RmDirRecursive(const std::string &Dir); // Iterate files and dirs inside Dir, recursively. diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h index 31096ce804bc1..2b172d9122277 100644 --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -67,7 +67,8 @@ class Fuzzer { void ExecuteCallback(const uint8_t *Data, size_t Size); bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, - InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr); + InputInfo *II = nullptr, bool ForceAddToCorpus = false, + bool *FoundUniqFeatures = nullptr); // Merge Corpora[1:] into Corpora[0]. void Merge(const Vector &Corpora); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index 02db6d27b0a3e..f9986dd8eea51 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -464,11 +464,13 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir, } bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, - InputInfo *II, bool *FoundUniqFeatures) { + InputInfo *II, bool ForceAddToCorpus, + bool *FoundUniqFeatures) { if (!Size) return false; ExecuteCallback(Data, Size); + auto TimeOfUnit = duration_cast(UnitStopTime - UnitStartTime); UniqFeatureSetTmp.clear(); size_t FoundUniqFeaturesOfII = 0; @@ -478,7 +480,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, UniqFeatureSetTmp.push_back(Feature); if (Options.Entropic) Corpus.UpdateFeatureFrequency(II, Feature); - if (Options.ReduceInputs && II) + if (Options.ReduceInputs && II && !II->NeverReduce) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) FoundUniqFeaturesOfII++; @@ -487,11 +489,12 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, *FoundUniqFeatures = FoundUniqFeaturesOfII; PrintPulseAndReportSlowInput(Data, Size); size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; - if (NumNewFeatures) { + if (NumNewFeatures || ForceAddToCorpus) { TPC.UpdateObservedPCs(); - auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, - MayDeleteFile, TPC.ObservedFocusFunction(), - UniqFeatureSetTmp, DFT, II); + auto NewII = + Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, + TPC.ObservedFocusFunction(), ForceAddToCorpus, + TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); return true; @@ -664,8 +667,11 @@ void Fuzzer::MutateAndTestOne() { MD.StartMutationSequence(); auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); - if (Options.DoCrossOver) - MD.SetCrossOverWith(&Corpus.ChooseUnitToMutate(MD.GetRand()).U); + if (Options.DoCrossOver) { + auto &CrossOverII = Corpus.ChooseUnitToCrossOverWith( + MD.GetRand(), Options.CrossOverUniformDist); + MD.SetCrossOverWith(&CrossOverII.U); + } const auto &U = II.U; memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); assert(CurrentUnitData); @@ -700,7 +706,7 @@ void Fuzzer::MutateAndTestOne() { bool FoundUniqFeatures = false; bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, - &FoundUniqFeatures); + /*ForceAddToCorpus*/ false, &FoundUniqFeatures); TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); if (NewCov) { @@ -768,7 +774,9 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector &CorporaFiles) { for (auto &SF : CorporaFiles) { auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false); assert(U.size() <= MaxInputLen); - RunOne(U.data(), U.size()); + RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr, + /*ForceAddToCorpus*/ Options.KeepSeed, + /*FoundUniqFeatures*/ nullptr); CheckExitOnSrcPosOrItem(); TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index b75e7c7af7093..b17a7474d38f0 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -18,6 +18,7 @@ struct FuzzingOptions { int Verbosity = 1; size_t MaxLen = 0; size_t LenControl = 1000; + bool KeepSeed = false; int UnitTimeoutSec = 300; int TimeoutExitCode = 70; int OOMExitCode = 71; @@ -30,6 +31,7 @@ struct FuzzingOptions { int RssLimitMb = 0; int MallocLimitMb = 0; bool DoCrossOver = true; + bool CrossOverUniformDist = false; int MutateDepth = 5; bool ReduceDepth = false; bool UseCounters = false; @@ -47,6 +49,7 @@ struct FuzzingOptions { bool Entropic = false; size_t EntropicFeatureFrequencyThreshold = 0xFF; size_t EntropicNumberOfRarestFeatures = 100; + bool EntropicScalePerExecTime = false; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index 0e9435ab8fcb4..d2b5cbb7d57d0 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -592,13 +592,17 @@ TEST(FuzzerUtil, Base64) { TEST(Corpus, Distribution) { DataFlowTrace DFT; Random Rand(0); - struct EntropicOptions Entropic = {false, 0xFF, 100}; + struct EntropicOptions Entropic = {false, 0xFF, 100, false}; std::unique_ptr C(new InputCorpus("", Entropic)); size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) - C->AddToCorpus(Unit{static_cast(i)}, 1, false, false, {}, DFT, - nullptr); + C->AddToCorpus(Unit{static_cast(i)}, /*NumFeatures*/ 1, + /*MayDeleteFile*/ false, /*HasFocusFunction*/ false, + /*ForceAddToCorpus*/ false, + /*TimeOfUnit*/ std::chrono::microseconds(0), + /*FeatureSet*/ {}, DFT, + /*BaseII*/ nullptr); Vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { @@ -1056,7 +1060,7 @@ TEST(Entropic, UpdateFrequency) { const size_t FeatIdx1 = 0, FeatIdx2 = 42, FeatIdx3 = 12, FeatIdx4 = 26; size_t Index; // Create input corpus with default entropic configuration - struct EntropicOptions Entropic = {true, 0xFF, 100}; + struct EntropicOptions Entropic = {true, 0xFF, 100, false}; std::unique_ptr C(new InputCorpus("", Entropic)); std::unique_ptr II(new InputInfo()); @@ -1093,23 +1097,23 @@ double SubAndSquare(double X, double Y) { TEST(Entropic, ComputeEnergy) { const double Precision = 0.01; - struct EntropicOptions Entropic = {true, 0xFF, 100}; + struct EntropicOptions Entropic = {true, 0xFF, 100, false}; std::unique_ptr C(new InputCorpus("", Entropic)); std::unique_ptr II(new InputInfo()); Vector> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}}; II->FeatureFreqs = FeatureFreqs; II->NumExecutedMutations = 0; - II->UpdateEnergy(4); + II->UpdateEnergy(4, false, std::chrono::microseconds(0)); EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision); II->NumExecutedMutations = 9; - II->UpdateEnergy(5); + II->UpdateEnergy(5, false, std::chrono::microseconds(0)); EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision); II->FeatureFreqs[0].second++; II->FeatureFreqs.push_back(std::pair(42, 6)); II->NumExecutedMutations = 20; - II->UpdateEnergy(10); + II->UpdateEnergy(10, false, std::chrono::microseconds(0)); EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision); } diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 53b9a3e563e94..4c98bb4861f20 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -3229,9 +3229,19 @@ TEST(MemorySanitizer, dlopenFailed) { #if !defined(__FreeBSD__) && !defined(__NetBSD__) TEST(MemorySanitizer, sched_getaffinity) { cpu_set_t mask; - int res = sched_getaffinity(getpid(), sizeof(mask), &mask); - ASSERT_EQ(0, res); - EXPECT_NOT_POISONED(mask); + if (sched_getaffinity(getpid(), sizeof(mask), &mask) == 0) + EXPECT_NOT_POISONED(mask); + else { + // The call to sched_getaffinity() may have failed because the Affinity + // mask is too small for the number of CPUs on the system (i.e. the + // system has more than 1024 CPUs). Allocate a mask large enough for + // twice as many CPUs. + cpu_set_t *DynAffinity; + DynAffinity = CPU_ALLOC(2048); + int res = sched_getaffinity(getpid(), CPU_ALLOC_SIZE(2048), DynAffinity); + ASSERT_EQ(0, res); + EXPECT_NOT_POISONED(*DynAffinity); + } } #endif diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index fa4b951383324..d57fdbae5371d 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -210,22 +210,6 @@ static void write_64bit_value(uint64_t i) { write_32bit_value(hi); } -static uint32_t length_of_string(const char *s) { - return (strlen(s) / 4) + 1; -} - -// Remove when we support libgcov 9 current_working_directory. -#if !defined(_MSC_VER) && defined(__clang__) -__attribute__((unused)) -#endif -static void -write_string(const char *s) { - uint32_t len = length_of_string(s); - write_32bit_value(len); - write_bytes(s, strlen(s)); - write_bytes("\0\0\0\0", 4 - (strlen(s) % 4)); -} - static uint32_t read_32bit_value() { uint32_t val; @@ -632,6 +616,9 @@ void llvm_writeout_files(void) { // __attribute__((destructor)) and destructors whose priorities are greater than // 100 run before this function and can thus be tracked. The priority is // compatible with GCC 7 onwards. +#if __GNUC__ >= 9 +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif __attribute__((destructor(100))) #endif static void llvm_writeout_and_clear(void) { diff --git a/compiler-rt/lib/sanitizer_common/.clang-tidy b/compiler-rt/lib/sanitizer_common/.clang-tidy deleted file mode 100644 index 6c71abff0d382..0000000000000 --- a/compiler-rt/lib/sanitizer_common/.clang-tidy +++ /dev/null @@ -1,16 +0,0 @@ -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming' -CheckOptions: - - key: readability-identifier-naming.ClassCase - value: CamelCase - - key: readability-identifier-naming.EnumCase - value: CamelCase - - key: readability-identifier-naming.FunctionCase - value: CamelCase - - key: readability-identifier-naming.UnionCase - value: CamelCase - - key: readability-identifier-naming.GlobalConstantCase - value: CamelCase - - key: readability-identifier-naming.GlobalConstantPrefix - value: "k" - - key: readability-identifier-naming.VariableCase - value: lower_case diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 6d73784d77d09..0cf483da1e5c8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -148,7 +148,6 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } - uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 47cc42cb411c2..2c25a687c5f08 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -153,6 +153,7 @@ class SizeClassAllocator32 { } void *GetMetaData(const void *p) { + CHECK(kMetadataSize); CHECK(PointerIsMine(p)); uptr mem = reinterpret_cast(p); uptr beg = ComputeRegionBeg(mem); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index acc61cc6ba8dd..a6126fc6265eb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -203,7 +203,8 @@ class SizeClassAllocator64 { uptr class_id = GetSizeClass(p); uptr size = ClassIdToSize(class_id); Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) return nullptr; + if (!size) + return nullptr; uptr chunk_idx = GetChunkIdx((uptr)p, size); uptr reg_beg = GetRegionBegin(p); uptr beg = chunk_idx * size; @@ -212,16 +213,16 @@ class SizeClassAllocator64 { "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " "kNumClasses %p\n", chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) return nullptr; + if (class_id >= kNumClasses) + return nullptr; const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, region->mapped_user); if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); + return reinterpret_cast(reg_beg + beg); return nullptr; } - uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); @@ -230,6 +231,7 @@ class SizeClassAllocator64 { static uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); } void *GetMetaData(const void *p) { + CHECK(kMetadataSize); uptr class_id = GetSizeClass(p); uptr size = ClassIdToSize(class_id); uptr chunk_idx = GetChunkIdx(reinterpret_cast(p), size); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h index d0ffc79b06107..84973eedda60a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h @@ -333,14 +333,10 @@ void NORETURN CheckFailed(const char *file, int line, const char *cond, #define UNIMPLEMENTED() UNREACHABLE("unimplemented") -#define COMPILER_CHECK(pred) IMPL_COMPILER_ASSERT(pred, __LINE__) +#define COMPILER_CHECK(pred) static_assert(pred, "") #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) -#define IMPL_PASTE(a, b) a##b -#define IMPL_COMPILER_ASSERT(pred, line) \ - typedef char IMPL_PASTE(assertion_failed_##_, line)[2*(int)(pred)-1] - // Limits for integral types. We have to redefine it in case we don't // have stdint.h (like in Visual Studio 9). #undef __INT64_C diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 6ca00c29ab732..465e581cf5134 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -306,7 +306,7 @@ class Allocator { void *Block = nullptr; uptr ClassId = 0; - uptr SecondaryBlockEnd; + uptr SecondaryBlockEnd = 0; if (LIKELY(PrimaryT::canAllocate(NeededSize))) { ClassId = SizeClassMap::getClassIdBySize(NeededSize); DCHECK_NE(ClassId, 0U); diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index b5bb53ddcf2d9..da435fd86adc8 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -75,11 +75,6 @@ template class MapAllocatorCache { public: - // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length - // arrays are an extension for some compilers. - // FIXME(kostyak): support (partially) the cache on Fuchsia. - static_assert(!SCUDO_FUCHSIA || EntriesArraySize == 0U, ""); - // Ensure the default maximum specified fits the array. static_assert(DefaultMaxEntriesCount <= EntriesArraySize, ""); @@ -392,9 +387,9 @@ void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, } const uptr CommitSize = MapEnd - PageSize - CommitBase; - const uptr Ptr = - reinterpret_cast(map(reinterpret_cast(CommitBase), - CommitSize, "scudo:secondary", 0, &Data)); + const uptr Ptr = reinterpret_cast( + map(reinterpret_cast(CommitBase), CommitSize, "scudo:secondary", + MAP_RESIZABLE, &Data)); LargeBlock::Header *H = reinterpret_cast(Ptr); H->MapBase = MapBase; H->MapSize = MapEnd - MapBase; diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index 9689c4265e06c..481158308c434 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,8 @@ static bool Ready; static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; -static void disableDebuggerdMaybe() { +// Fuchsia complains that the function is not used. +UNUSED static void disableDebuggerdMaybe() { #if SCUDO_ANDROID // Disable the debuggerd signal handler on Android, without this we can end // up spending a significant amount of time creating tombstones. diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 29efdb3060128..d9f2d2fcb95f1 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -56,18 +56,12 @@ template static void testSecondaryBasic(void) { TEST(ScudoSecondaryTest, SecondaryBasic) { testSecondaryBasic>(); -#if !SCUDO_FUCHSIA testSecondaryBasic>>(); testSecondaryBasic< scudo::MapAllocator>>(); -#endif } -#if SCUDO_FUCHSIA -using LargeAllocator = scudo::MapAllocator; -#else using LargeAllocator = scudo::MapAllocator>; -#endif // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp index b56cc2dab7044..cbbb7ecb2397e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp @@ -240,10 +240,10 @@ SANITIZER_WEAK_IMPORT void dispatch_barrier_async_and_wait( SANITIZER_WEAK_IMPORT void dispatch_barrier_async_and_wait_f( dispatch_queue_t queue, void *context, dispatch_function_t work); -DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false) DISPATCH_INTERCEPT_SYNC_F(dispatch_async_and_wait_f, false) -DISPATCH_INTERCEPT_SYNC_B(dispatch_barrier_async_and_wait, true) +DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false) DISPATCH_INTERCEPT_SYNC_F(dispatch_barrier_async_and_wait_f, true) +DISPATCH_INTERCEPT_SYNC_B(dispatch_barrier_async_and_wait, true) #endif diff --git a/compiler-rt/test/.clang-tidy b/compiler-rt/test/.clang-tidy new file mode 100644 index 0000000000000..612bd0ee8de8a --- /dev/null +++ b/compiler-rt/test/.clang-tidy @@ -0,0 +1 @@ +Checks: '-*' diff --git a/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c b/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c index 952d6fcdd4656..d72b0ba8a8bb3 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c +++ b/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c @@ -4,11 +4,11 @@ // RUN: llvm-objdump -s %t | FileCheck %s // Check that "Hello.\n" is in __asan_cstring and not in __cstring. -// CHECK: Contents of section __asan_cstring: +// CHECK: Contents of section {{.*}}__asan_cstring: // CHECK: 48656c6c {{.*}} Hello. -// CHECK: Contents of section __const: +// CHECK: Contents of section {{.*}}__const: // CHECK-NOT: 48656c6c {{.*}} Hello. -// CHECK: Contents of section __cstring: +// CHECK: Contents of section {{.*}}__cstring: // CHECK-NOT: 48656c6c {{.*}} Hello. int main(int argc, char *argv[]) { diff --git a/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp new file mode 100644 index 0000000000000..ec3a6906d3886 --- /dev/null +++ b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp @@ -0,0 +1,33 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Tests whether scaling the Entropic scheduling weight based on input execution +// time is effective or not. Inputs of size 10 will take at least 100 +// microseconds more than any input of size 1-9. The input of size 2 in the +// corpus should be favored by the exec-time-scaled Entropic scheduling policy +// than the input of size 10 in the corpus, eventually finding the crashing +// input {0xab, 0xcd} with less executions. +#include +#include +#include + +static volatile int Sink; +static volatile int *Nil = nullptr; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 10) + return 0; // To make the test quicker. + + if (Size == 10) { + size_t ExecTimeUSec = 100; + std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec)); + + Sink = 0; // execute a lot slower than the crashing input below. + } + + if (Size == 2 && Data[0] == 0xab && Data[1] == 0xcd) + *Nil = 42; // crash. + + return 0; +} diff --git a/compiler-rt/test/fuzzer/KeepSeedTest.cpp b/compiler-rt/test/fuzzer/KeepSeedTest.cpp new file mode 100644 index 0000000000000..f343161abde52 --- /dev/null +++ b/compiler-rt/test/fuzzer/KeepSeedTest.cpp @@ -0,0 +1,37 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Test whether the fuzzer can find "SELECT FROM WHERE", given a seed input +// "SELECTxFROMxWHERE". Without -keep_seed=1, it takes longer time to trigger +// find the desired string, because the seed input is more likely to be reduced +// to a prefix of the given input first, losing useful fragments towards the end +// of the seed input. +#include +#include +#include +#include + +static volatile int Sink = 0; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 17) + return 0; + + if (Size >= 6 && Data[0] == 'S' && Data[1] == 'E' && Data[2] == 'L' && + Data[3] == 'E' && Data[4] == 'C' && Data[5] == 'T') { + if (Size >= 7 && Data[6] == ' ') { + if (Size >= 11 && Data[7] == 'F' && Data[8] == 'R' && Data[9] == 'O' && + Data[10] == 'M') { + if (Size >= 12 && Data[11] == ' ') { + if (Size >= 17 && Data[12] == 'W' && Data[13] == 'H' && + Data[14] == 'E' && Data[15] == 'R' && Data[16] == 'E') { + fprintf(stderr, "BINGO; Found the target, exiting.\n"); + exit(1); + } + } + } + } + } + return 0; +} diff --git a/compiler-rt/test/fuzzer/cross_over_uniform_dist.test b/compiler-rt/test/fuzzer/cross_over_uniform_dist.test new file mode 100644 index 0000000000000..0dff5fd628f37 --- /dev/null +++ b/compiler-rt/test/fuzzer/cross_over_uniform_dist.test @@ -0,0 +1,16 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-CrossOverUniformDistTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n "@SELECT" > %t-corpus/A +RUN: echo -n "@FROM WHERE" > %t-corpus/B + +RUN: not %run %t-CrossOverUniformDistTest -keep_seed=1 -cross_over_uniform_dist=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n "@SELECT" > %t-corpus/A +RUN: echo -n "@FROM WHERE" > %t-corpus/B +RUN: %run %t-CrossOverUniformDistTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 diff --git a/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test new file mode 100644 index 0000000000000..d34550f9c951f --- /dev/null +++ b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test @@ -0,0 +1,8 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest +RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=100000 -max_len=10 + +# The following test is added as a comment here for reference, which should +# take more runs than with -entropic_scale_per_exec_time=1 to find the crash. +# (it takes 126,633 runs) +# RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10 diff --git a/compiler-rt/test/fuzzer/fuzzer-dirs.test b/compiler-rt/test/fuzzer/fuzzer-dirs.test index 2bf2a8b143300..c822c2f95c305 100644 --- a/compiler-rt/test/fuzzer/fuzzer-dirs.test +++ b/compiler-rt/test/fuzzer/fuzzer-dirs.test @@ -16,6 +16,7 @@ RUN: %run %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG LONG: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 8192 bytes RUN: rm -rf %t/SUB1 +# Verify error message prints to console when directory does not exist RUN: rm -rf %t.dir && mkdir -p %t.dir RUN: not %run %t-SimpleTest -artifact_prefix=%t.dir/NONEXISTENT_DIR/ 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX RUN: not %run %t-SimpleTest -artifact_prefix=%t.dir/NONEXISTENT_DIR/myprefix 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX @@ -23,3 +24,40 @@ RUN: not %run %t-SimpleTest -features_dir=%t.dir/NONEXISTENT_DIR/ 2>&1 | FileChe RUN: not %run %t-SimpleTest %t.dir/NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX RUN: not %run %t-SimpleTest -exact_artifact_path=%t.dir/NONEXISTENT_DIR/myprefix 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX NONEXISTENT_DIR_RGX: ERROR: The required directory "{{.*/NONEXISTENT_DIR/?}}" does not exist + +# Verify error message prints to console when given directory is an empty +# string +RUN: not %run %t-SimpleTest "" 2>&1 | FileCheck %s --check-prefix=INVALID_DIR_RGX +INVALID_DIR_RGX: ERROR: Provided directory path is an empty string + +# Verify error message prints to console when directory creation fails +# For platforms without functioning chmod (i.e. Windows), use a forbidden +# character in the directory name. +RUN: rm -rf %t.dir && mkdir -p %t.dir/access_restricted +RUN: chmod u-w %t.dir/access_restricted || true +RUN: not %run %t-SimpleTest -create_missing_dirs=1 %t.dir/access_restricted/?corpus? 2>&1 | FileCheck %s --check-prefix=DIR_CREATION_FAILURE +DIR_CREATION_FAILURE: ERROR: Failed to create directory "{{.*/access_restricted/\?corpus\?}}" + +# Verify directories and sub-directories are created when -create_missing_dirs=1 +RUN: not %run %t-SimpleTest -create_missing_dirs=1 -artifact_prefix=%t.dir/subdira/./././artifacts/ -features_dir=%t.dir/subdirb/dummy_dir/././../subdirb/features/ %t.dir/subdirc/corpus +RUN: test -e %t.dir/subdira/artifacts/ +RUN: test -e %t.dir/subdirb/subdirb/features/ +RUN: test -e %t.dir/subdirc/corpus/ +RUN: test -e %t.dir/subdirb/dummy_dir + +# Verify directories and sub-directories are created for exact_artifact_path +# when -create_missing_dirs=1 +RUN: not %run %t-SimpleTest -create_missing_dirs=1 -exact_artifact_path=%t.dir/subdird/exact_artifacts/abc +RUN: test -e %t.dir/subdird/exact_artifacts/abc + +# Verify directories and sub-directories are created for artifact_prefix when +# it's referring to a file name prefix and -create_missing_dirs=1 +RUN: not %run %t-SimpleTest -create_missing_dirs=1 -artifact_prefix=%t.dir/subdire/myprefix +RUN: test -e %t.dir/subdire/ && not test -e %t.dir/subdire/myprefix + +# Verify directories are created when referring to relative paths and +# -create_missing_dirs=1 +RUN: cd %t.dir && not %run %t-SimpleTest -create_missing_dirs=1 -artifact_prefix=cwd_artifacts/ -features_dir=cwd_features/subdirtest/ ./cwd_corpus +RUN: test -e %t.dir/cwd_artifacts/ +RUN: test -e %t.dir/cwd_features/subdirtest/ +RUN: test -e %t.dir/cwd_corpus/ diff --git a/compiler-rt/test/fuzzer/keep-seed.test b/compiler-rt/test/fuzzer/keep-seed.test new file mode 100644 index 0000000000000..29212ac7c177c --- /dev/null +++ b/compiler-rt/test/fuzzer/keep-seed.test @@ -0,0 +1,17 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-KeepSeedTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments + +RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: rm -rf %t-corpus-baseline +RUN: mkdir %t-corpus-baseline +RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments + +# The following checks whether without -keep_seed=1 libFuzzer does not find the +# crashing input "SELECT FROM WHERE" even with 2x more runs. +RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1 diff --git a/flang/README.md b/flang/README.md index fafc1f91a421f..3a58c277bacf3 100644 --- a/flang/README.md +++ b/flang/README.md @@ -159,7 +159,7 @@ make test check-all To run individual regression tests llvm-lit needs to know the lit configuration for flang. The parameters in charge of this are: -flang_site_config and flang_config. And they can be set as shown bellow: +flang_site_config and flang_config. And they can be set as shown below: ``` /llvm-lit \ --param flang_site_config=/test-lit/lit.site.cfg.py \ @@ -214,9 +214,11 @@ To generate doxygen-style documentation from source code cd ~/llvm-project/build cmake -DLLVM_ENABLE_DOXYGEN=ON -DFLANG_INCLUDE_DOCS=ON ../llvm make doxygen-flang +``` It will generate html in +``` /tools/flang/docs/doxygen/html # for flang docs ``` ## Generate Sphinx-based Documentation @@ -227,17 +229,18 @@ is mostly meant to be processed by the Sphinx documentation generation system to create HTML pages which would be hosted on the webpage of flang and updated periodically. -If you would like to generate and view the HTML locally, install -Sphinx and then: - +If you would like to generate and view the HTML locally: +- Install [Sphinx](http://sphinx-doc.org/), including the [sphinx-markdown-tables](https://pypi.org/project/sphinx-markdown-tables/) extension. - Pass `-DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF` to the cmake command. ``` cd ~/llvm-project/build cmake -DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF ../llvm make docs-flang-html +``` It will generate html in +``` $BROWSER /tools/flang/docs/html/ ``` diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index a3260400a9bf7..027927f67dfd4 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -56,6 +56,7 @@ Extensions, deletions, and legacy features supported by default * `NAME=` as synonym for `FILE=` * Data edit descriptors without width or other details * `D` lines in fixed form as comments or debug code +* `CARRIAGECONTROL=` on the OPEN and INQUIRE statements * `CONVERT=` on the OPEN and INQUIRE statements * `DISPOSE=` on the OPEN and INQUIRE statements * Leading semicolons are ignored before any statement that diff --git a/flang/docs/doxygen-mainpage.dox b/flang/docs/doxygen-mainpage.dox index 78469e31312cf..e4bc3f6602425 100644 --- a/flang/docs/doxygen-mainpage.dox +++ b/flang/docs/doxygen-mainpage.dox @@ -6,7 +6,7 @@ /// This documentation describes the **internal** software that makes /// up flang, not the **external** use of flang. There are no instructions /// here on how to use flang, only the APIs that make up the software. For -/// usage instructions, please see the [project website](https://github.com/flang-compiler/f18) +/// usage instructions, please see the [project website](https://github.com/llvm-project/flang) /// for further detail. /// /// \section main_caveat Caveat diff --git a/flang/docs/f2018-grammar.txt b/flang/docs/f2018-grammar.txt index 2de8cdfc1b8f7..9b2819d69c724 100644 --- a/flang/docs/f2018-grammar.txt +++ b/flang/docs/f2018-grammar.txt @@ -577,7 +577,8 @@ R1205 connect-spec -> POSITION = scalar-default-char-expr | RECL = scalar-int-expr | ROUND = scalar-default-char-expr | SIGN = scalar-default-char-expr | STATUS = scalar-default-char-expr - @ | CONVERT = scalar-default-char-expr + @ | CARRIAGECONTROL = scalar-default-char-expr + | CONVERT = scalar-default-char-expr | DISPOSE = scalar-default-char-expr R1206 file-name-expr -> scalar-default-char-expr R1207 iomsg-variable -> scalar-default-char-variable @@ -657,7 +658,8 @@ R1231 inquire-spec -> STREAM = scalar-default-char-variable | STATUS = scalar-default-char-variable | WRITE = scalar-default-char-variable - @ | CONVERT = scalar-default-char-expr + @ | CARRIAGECONTROL = scalar-default-char-expr + | CONVERT = scalar-default-char-expr | DISPOSE = scalar-default-char-expr R1301 format-stmt -> FORMAT format-specification diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h index ebf7a8d9d6230..23c2e95fd5648 100644 --- a/flang/include/flang/Common/Fortran-features.h +++ b/flang/include/flang/Common/Fortran-features.h @@ -22,14 +22,14 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines, DoubleComplex, Byte, StarKind, QuadPrecision, SlashInitialization, TripletInArrayConstructor, MissingColons, SignedComplexLiteral, OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName, - Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor, - ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer, - Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenACC, OpenMP, - CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals, - RealDoControls, EquivalenceNumericWithCharacter, AdditionalIntrinsics, - AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment, - EmptySourceFile, ProgramReturn, ImplicitNoneTypeNever, - ImplicitNoneTypeAlways) + Carriagecontrol, Convert, Dispose, IOListLeadingComma, + AbbreviatedEditDescriptor, ProgramParentheses, PercentRefAndVal, + OmitFunctionDummies, CrayPointer, Hollerith, ArithmeticIF, Assign, + AssignedGOTO, Pause, OpenACC, OpenMP, CruftAfterAmpersand, ClassicCComments, + AdditionalFormats, BigIntLiterals, RealDoControls, + EquivalenceNumericWithCharacter, AdditionalIntrinsics, AnonymousParents, + OldLabelDoEndStatements, LogicalIntegerAssignment, EmptySourceFile, + ProgramReturn, ImplicitNoneTypeNever, ImplicitNoneTypeAlways) using LanguageFeatures = EnumSet; diff --git a/flang/include/flang/Common/Fortran.h b/flang/include/flang/Common/Fortran.h index df6b27c8ce3b1..5d5ab324e826e 100644 --- a/flang/include/flang/Common/Fortran.h +++ b/flang/include/flang/Common/Fortran.h @@ -52,6 +52,7 @@ ENUM_CLASS(IoSpecKind, Access, Action, Advance, Asynchronous, Blank, Decimal, Id, Iomsg, Iostat, Name, Named, Newunit, Nextrec, Nml, Number, Opened, Pad, Pending, Pos, Position, Read, Readwrite, Rec, Recl, Round, Sequential, Sign, Size, Status, Stream, Unformatted, Unit, Write, + Carriagecontrol, // nonstandard Convert, // nonstandard Dispose, // nonstandard ) diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h index aee7a0ef5bd8d..ebaffaa4a6e0e 100644 --- a/flang/include/flang/Lower/Bridge.h +++ b/flang/include/flang/Lower/Bridge.h @@ -34,7 +34,7 @@ namespace evaluate { class IntrinsicProcTable; } // namespace evaluate namespace parser { -class CookedSource; +class AllCookedSources; struct Program; } // namespace parser namespace semantics { @@ -55,8 +55,8 @@ class LoweringBridge { static LoweringBridge create(const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds, const Fortran::evaluate::IntrinsicProcTable &intrinsics, - const Fortran::parser::CookedSource &cooked) { - return LoweringBridge{defaultKinds, intrinsics, cooked}; + const Fortran::parser::AllCookedSources &allCooked) { + return LoweringBridge{defaultKinds, intrinsics, allCooked}; } //===--------------------------------------------------------------------===// @@ -71,7 +71,7 @@ class LoweringBridge { const Fortran::evaluate::IntrinsicProcTable &getIntrinsicTable() const { return intrinsics; } - const Fortran::parser::CookedSource *getCookedSource() const { + const Fortran::parser::AllCookedSources *getCookedSource() const { return cooked; } @@ -99,13 +99,13 @@ class LoweringBridge { explicit LoweringBridge( const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds, const Fortran::evaluate::IntrinsicProcTable &intrinsics, - const Fortran::parser::CookedSource &cooked); + const Fortran::parser::AllCookedSources &); LoweringBridge() = delete; LoweringBridge(const LoweringBridge &) = delete; const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds; const Fortran::evaluate::IntrinsicProcTable &intrinsics; - const Fortran::parser::CookedSource *cooked; + const Fortran::parser::AllCookedSources *cooked; std::unique_ptr context; std::unique_ptr module; fir::KindMapping kindMap; diff --git a/flang/include/flang/Lower/ConvertType.h b/flang/include/flang/Lower/ConvertType.h index f4046efba1127..b807d62038186 100644 --- a/flang/include/flang/Lower/ConvertType.h +++ b/flang/include/flang/Lower/ConvertType.h @@ -48,11 +48,6 @@ template class Type; } // namespace evaluate -namespace parser { -class CharBlock; -class CookedSource; -} // namespace parser - namespace semantics { class Symbol; } // namespace semantics diff --git a/flang/include/flang/Parser/instrumented-parser.h b/flang/include/flang/Parser/instrumented-parser.h index 51dbd5f03c177..1bc1c526dc9f7 100644 --- a/flang/include/flang/Parser/instrumented-parser.h +++ b/flang/include/flang/Parser/instrumented-parser.h @@ -31,7 +31,7 @@ class ParsingLog { bool Fails(const char *at, const MessageFixedText &tag, ParseState &); void Note(const char *at, const MessageFixedText &tag, bool pass, const ParseState &); - void Dump(llvm::raw_ostream &, const CookedSource &) const; + void Dump(llvm::raw_ostream &, const AllCookedSources &) const; private: struct LogForPosition { diff --git a/flang/include/flang/Parser/message.h b/flang/include/flang/Parser/message.h index 46a72e08a237d..cd1df0a968e72 100644 --- a/flang/include/flang/Parser/message.h +++ b/flang/include/flang/Parser/message.h @@ -186,14 +186,14 @@ class Message : public common::ReferenceCounted { bool SortBefore(const Message &that) const; bool IsFatal() const; std::string ToString() const; - std::optional GetProvenanceRange(const CookedSource &) const; - void Emit(llvm::raw_ostream &, const CookedSource &, + std::optional GetProvenanceRange( + const AllCookedSources &) const; + void Emit(llvm::raw_ostream &, const AllCookedSources &, bool echoSourceLine = true) const; - // If this Message or any of its attachments locates itself via a CharBlock - // within a particular CookedSource, replace its location with the - // corresponding ProvenanceRange. - void ResolveProvenances(const CookedSource &); + // If this Message or any of its attachments locates itself via a CharBlock, + // replace its location with the corresponding ProvenanceRange. + void ResolveProvenances(const AllCookedSources &); bool IsMergeable() const { return std::holds_alternative(text_); @@ -255,8 +255,8 @@ class Messages { bool Merge(const Message &); void Merge(Messages &&); void Copy(const Messages &); - void ResolveProvenances(const CookedSource &); - void Emit(llvm::raw_ostream &, const CookedSource &cooked, + void ResolveProvenances(const AllCookedSources &); + void Emit(llvm::raw_ostream &, const AllCookedSources &, bool echoSourceLines = true) const; void AttachTo(Message &); bool AnyFatalError() const; diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 7f9984bc50481..166e573b5cec3 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -2549,7 +2549,8 @@ using FileNameExpr = ScalarDefaultCharExpr; // POSITION = scalar-default-char-expr | RECL = scalar-int-expr | // ROUND = scalar-default-char-expr | SIGN = scalar-default-char-expr | // STATUS = scalar-default-char-expr -// @ | CONVERT = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable // | DISPOSE = scalar-default-char-variable WRAPPER_CLASS(StatusExpr, ScalarDefaultCharExpr); WRAPPER_CLASS(ErrLabel, Label); @@ -2559,7 +2560,7 @@ struct ConnectSpec { struct CharExpr { ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal, Delim, Encoding, Form, Pad, Position, Round, Sign, - /* extensions: */ Convert, Dispose) + /* extensions: */ Carriagecontrol, Convert, Dispose) TUPLE_CLASS_BOILERPLATE(CharExpr); std::tuple t; }; @@ -2767,7 +2768,8 @@ WRAPPER_CLASS(FlushStmt, std::list); // STATUS = scalar-default-char-variable | // UNFORMATTED = scalar-default-char-variable | // WRITE = scalar-default-char-variable -// @ | CONVERT = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable // | DISPOSE = scalar-default-char-variable struct InquireSpec { UNION_CLASS_BOILERPLATE(InquireSpec); @@ -2775,7 +2777,7 @@ struct InquireSpec { ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal, Delim, Direct, Encoding, Form, Formatted, Iomsg, Name, Pad, Position, Read, Readwrite, Round, Sequential, Sign, Stream, Status, Unformatted, Write, - /* extensions: */ Convert, Dispose) + /* extensions: */ Carriagecontrol, Convert, Dispose) TUPLE_CLASS_BOILERPLATE(CharVar); std::tuple t; }; diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h index 9f8bff9e1d70a..6594f97088d58 100644 --- a/flang/include/flang/Parser/parsing.h +++ b/flang/include/flang/Parser/parsing.h @@ -40,15 +40,17 @@ struct Options { class Parsing { public: - explicit Parsing(AllSources &); + explicit Parsing(AllCookedSources &); ~Parsing(); bool consumedWholeFile() const { return consumedWholeFile_; } const char *finalRestingPlace() const { return finalRestingPlace_; } - CookedSource &cooked() { return cooked_; } + AllCookedSources &allCooked() { return allCooked_; } Messages &messages() { return messages_; } std::optional &parseTree() { return parseTree_; } + const CookedSource &cooked() const { return DEREF(currentCooked_); } + const SourceFile *Prescan(const std::string &path, Options); void DumpCookedChars(llvm::raw_ostream &) const; void DumpProvenance(llvm::raw_ostream &) const; @@ -58,13 +60,14 @@ class Parsing { void EmitMessage(llvm::raw_ostream &o, const char *at, const std::string &message, bool echoSourceLine = false) const { - cooked_.allSources().EmitMessage( - o, cooked_.GetProvenanceRange(CharBlock(at)), message, echoSourceLine); + allCooked_.allSources().EmitMessage(o, + allCooked_.GetProvenanceRange(CharBlock(at)), message, echoSourceLine); } private: Options options_; - CookedSource cooked_; + AllCookedSources &allCooked_; + CookedSource *currentCooked_{nullptr}; Messages messages_; bool consumedWholeFile_{false}; const char *finalRestingPlace_{nullptr}; diff --git a/flang/include/flang/Parser/provenance.h b/flang/include/flang/Parser/provenance.h index b543cd7d7b4ef..52aac931e8995 100644 --- a/flang/include/flang/Parser/provenance.h +++ b/flang/include/flang/Parser/provenance.h @@ -17,6 +17,7 @@ #include "flang/Common/interval.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -213,28 +214,22 @@ class AllSources { Encoding encoding_{Encoding::UTF_8}; }; +// Represents the result of preprocessing and prescanning a single source +// file (and all its inclusions) or module file. Parsers operate within +// single instances of CookedSource. class CookedSource { public: - explicit CookedSource(AllSources &); - ~CookedSource(); - - AllSources &allSources() { return allSources_; } - const AllSources &allSources() const { return allSources_; } const std::string &data() const { return data_; } - bool IsValid(const char *p) const { + bool Contains(const char *p) const { return p >= &data_.front() && p <= &data_.back() + 1; } - bool IsValid(CharBlock range) const { - return !range.empty() && IsValid(range.begin()) && IsValid(range.end() - 1); + bool Contains(CharBlock range) const { + return !range.empty() && Contains(range.begin()) && + Contains(range.end() - 1); } - bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } std::optional GetProvenanceRange(CharBlock) const; - std::optional GetCharBlockFromLineAndColumns( - int line, int startColumn, int endColumn) const; - std::optional> - GetSourcePositionRange(CharBlock) const; std::optional GetCharBlock(ProvenanceRange) const; // The result of a Put() is the offset that the new data @@ -256,17 +251,51 @@ class CookedSource { } std::size_t BufferedBytes() const; - void Marshal(); // marshals text into one contiguous block - void CompileProvenanceRangeToOffsetMappings(); + void Marshal(AllSources &); // marshals text into one contiguous block + void CompileProvenanceRangeToOffsetMappings(AllSources &); std::string AcquireData() { return std::move(data_); } llvm::raw_ostream &Dump(llvm::raw_ostream &) const; private: - AllSources &allSources_; CharBuffer buffer_; // before Marshal() std::string data_; // all of it, prescanned and preprocessed OffsetToProvenanceMappings provenanceMap_; ProvenanceRangeToOffsetMappings invertedMap_; }; + +class AllCookedSources { +public: + explicit AllCookedSources(AllSources &); + ~AllCookedSources(); + + AllSources &allSources() { return allSources_; } + const AllSources &allSources() const { return allSources_; } + + CookedSource &NewCookedSource(); + + template // const char * or CharBlock + const CookedSource *Find(A x) const { + for (const auto &c : cooked_) { + if (c.Contains(x)) { + return &c; + } + } + return nullptr; + } + + bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } + + std::optional GetProvenanceRange(CharBlock) const; + std::optional GetCharBlockFromLineAndColumns( + int line, int startColumn, int endColumn) const; + std::optional> + GetSourcePositionRange(CharBlock) const; + std::optional GetCharBlock(ProvenanceRange) const; + void Dump(llvm::raw_ostream &) const; + +private: + AllSources &allSources_; + std::list cooked_; // owns all CookedSource instances +}; } // namespace Fortran::parser #endif // FORTRAN_PARSER_PROVENANCE_H_ diff --git a/flang/include/flang/Parser/user-state.h b/flang/include/flang/Parser/user-state.h index 75757d2f305a6..6a4cf9736f1ff 100644 --- a/flang/include/flang/Parser/user-state.h +++ b/flang/include/flang/Parser/user-state.h @@ -26,7 +26,7 @@ namespace Fortran::parser { -class CookedSource; +class AllCookedSources; class ParsingLog; class ParseState; @@ -34,10 +34,11 @@ class Success {}; // for when one must return something that's present class UserState { public: - UserState(const CookedSource &cooked, common::LanguageFeatureControl features) - : cooked_{cooked}, features_{features} {} + UserState(const AllCookedSources &allCooked, + common::LanguageFeatureControl features) + : allCooked_{allCooked}, features_{features} {} - const CookedSource &cooked() const { return cooked_; } + const AllCookedSources &allCooked() const { return allCooked_; } const common::LanguageFeatureControl &features() const { return features_; } llvm::raw_ostream *debugOutput() const { return debugOutput_; } @@ -89,7 +90,7 @@ class UserState { } private: - const CookedSource &cooked_; + const AllCookedSources &allCooked_; llvm::raw_ostream *debugOutput_{nullptr}; diff --git a/flang/include/flang/Semantics/scope.h b/flang/include/flang/Semantics/scope.h index 5ebe5f32eb677..853d7044f7fd5 100644 --- a/flang/include/flang/Semantics/scope.h +++ b/flang/include/flang/Semantics/scope.h @@ -187,10 +187,6 @@ class Scope { const DeclTypeSpec &MakeTypeStarType(); const DeclTypeSpec &MakeClassStarType(); - // For modules read from module files, this is the stream of characters - // that are referenced by SourceName objects. - void set_chars(parser::CookedSource &); - std::size_t size() const { return size_; } void set_size(std::size_t size) { size_ = size; } std::size_t alignment() const { return alignment_; } @@ -245,7 +241,6 @@ class Scope { mapType crayPointers_; std::map> submodules_; std::list declTypeSpecs_; - std::string chars_; std::optional importKind_; std::set importNames_; DerivedTypeSpec *derivedTypeSpec_{nullptr}; // dTS->scope() == this diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index 3c7ba98f66204..4c2c0e75992a4 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -30,7 +30,7 @@ class IntrinsicTypeDefaultKinds; namespace Fortran::parser { struct Name; struct Program; -class CookedSource; +class AllCookedSources; struct AssociateConstruct; struct BlockConstruct; struct CaseConstruct; @@ -60,7 +60,7 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllSources &); + const common::LanguageFeatureControl &, parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -89,7 +89,7 @@ class SemanticsContext { Scope &globalScope() { return globalScope_; } parser::Messages &messages() { return messages_; } evaluate::FoldingContext &foldingContext() { return foldingContext_; } - parser::AllSources &allSources() { return allSources_; } + parser::AllCookedSources &allCookedSources() { return allCookedSources_; } SemanticsContext &set_location( const std::optional &location) { @@ -179,7 +179,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl languageFeatures_; - parser::AllSources &allSources_; + parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; std::string moduleDirectory_{"."s}; @@ -204,8 +204,8 @@ class SemanticsContext { class Semantics { public: explicit Semantics(SemanticsContext &context, parser::Program &program, - parser::CookedSource &cooked, bool debugModuleWriter = false) - : context_{context}, program_{program}, cooked_{cooked} { + const parser::CookedSource &cooked, bool debugModuleWriter = false) + : context_{context}, program_{program} { context.set_debugModuleWriter(debugModuleWriter); context.globalScope().AddSourceRange(parser::CharBlock{cooked.data()}); } @@ -223,7 +223,6 @@ class Semantics { private: SemanticsContext &context_; parser::Program &program_; - const parser::CookedSource &cooked_; }; // Base class for semantics checkers. diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index 50dc5c80df6ac..3f79b79e32ee9 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -60,12 +60,12 @@ static constexpr std::tuple< mkIOKey(OutputComplex64), mkIOKey(OutputComplex32), mkIOKey(OutputAscii), mkIOKey(InputAscii), mkIOKey(OutputLogical), mkIOKey(InputLogical), mkIOKey(SetAccess), mkIOKey(SetAction), mkIOKey(SetAsynchronous), - mkIOKey(SetEncoding), mkIOKey(SetForm), mkIOKey(SetPosition), - mkIOKey(SetRecl), mkIOKey(SetStatus), mkIOKey(SetFile), mkIOKey(GetNewUnit), - mkIOKey(GetSize), mkIOKey(GetIoLength), mkIOKey(GetIoMsg), - mkIOKey(InquireCharacter), mkIOKey(InquireLogical), - mkIOKey(InquirePendingId), mkIOKey(InquireInteger64), - mkIOKey(EndIoStatement)> + mkIOKey(SetCarriagecontrol), mkIOKey(SetEncoding), mkIOKey(SetForm), + mkIOKey(SetPosition), mkIOKey(SetRecl), mkIOKey(SetStatus), + mkIOKey(SetFile), mkIOKey(GetNewUnit), mkIOKey(GetSize), + mkIOKey(GetIoLength), mkIOKey(GetIoMsg), mkIOKey(InquireCharacter), + mkIOKey(InquireLogical), mkIOKey(InquirePendingId), + mkIOKey(InquireInteger64), mkIOKey(EndIoStatement)> newIOTable; } // namespace Fortran::lower @@ -599,6 +599,9 @@ mlir::Value genIOOption( case Fortran::parser::ConnectSpec::CharExpr::Kind::Sign: ioFunc = getIORuntimeFunc(loc, builder); break; + case Fortran::parser::ConnectSpec::CharExpr::Kind::Carriagecontrol: + ioFunc = getIORuntimeFunc(loc, builder); + break; case Fortran::parser::ConnectSpec::CharExpr::Kind::Convert: llvm_unreachable("CONVERT not part of the runtime::io interface"); case Fortran::parser::ConnectSpec::CharExpr::Kind::Dispose: diff --git a/flang/lib/Parser/debug-parser.cpp b/flang/lib/Parser/debug-parser.cpp index dbcc64f14bb1d..af5da091cde63 100644 --- a/flang/lib/Parser/debug-parser.cpp +++ b/flang/lib/Parser/debug-parser.cpp @@ -18,9 +18,9 @@ std::optional DebugParser::Parse(ParseState &state) const { std::string note{str_, length_}; Message message{state.GetLocation(), "parser debug: %s"_en_US, note}; message.SetContext(state.context().get()); - message.Emit(*out, ustate->cooked(), true); + message.Emit(*out, ustate->allCooked(), true); } } - return {Success{}}; + return Success{}; } } // namespace Fortran::parser diff --git a/flang/lib/Parser/instrumented-parser.cpp b/flang/lib/Parser/instrumented-parser.cpp index 765d292193958..6687aa1bbe542 100644 --- a/flang/lib/Parser/instrumented-parser.cpp +++ b/flang/lib/Parser/instrumented-parser.cpp @@ -63,14 +63,15 @@ void ParsingLog::Note(const char *at, const MessageFixedText &tag, bool pass, } } -void ParsingLog::Dump(llvm::raw_ostream &o, const CookedSource &cooked) const { +void ParsingLog::Dump( + llvm::raw_ostream &o, const AllCookedSources &allCooked) const { for (const auto &posLog : perPos_) { const char *at{reinterpret_cast(posLog.first)}; for (const auto &tagLog : posLog.second.perTag) { - Message{at, tagLog.first}.Emit(o, cooked, true); + Message{at, tagLog.first}.Emit(o, allCooked, true); auto &entry{tagLog.second}; o << " " << (entry.pass ? "pass" : "fail") << " " << entry.count << '\n'; - entry.messages.Emit(o, cooked); + entry.messages.Emit(o, allCooked); } } } diff --git a/flang/lib/Parser/io-parsers.cpp b/flang/lib/Parser/io-parsers.cpp index 30f6db172c749..3615501a98edc 100644 --- a/flang/lib/Parser/io-parsers.cpp +++ b/flang/lib/Parser/io-parsers.cpp @@ -54,8 +54,9 @@ constexpr auto fileNameExpr{scalarDefaultCharExpr}; // POSITION = scalar-default-char-expr | RECL = scalar-int-expr | // ROUND = scalar-default-char-expr | SIGN = scalar-default-char-expr | // STATUS = scalar-default-char-expr -// @ | CONVERT = scalar-default-char-variable -// @ | DISPOSE = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable +// | DISPOSE = scalar-default-char-variable constexpr auto statusExpr{construct(scalarDefaultCharExpr)}; constexpr auto errLabel{construct(label)}; @@ -107,6 +108,10 @@ TYPE_PARSER(first(construct(maybe("UNIT ="_tok) >> fileUnitNumber), "SIGN =" >> pure(ConnectSpec::CharExpr::Kind::Sign), scalarDefaultCharExpr)), construct("STATUS =" >> statusExpr), + extension(construct( + construct("CARRIAGECONTROL =" >> + pure(ConnectSpec::CharExpr::Kind::Carriagecontrol), + scalarDefaultCharExpr))), extension( construct(construct( "CONVERT =" >> pure(ConnectSpec::CharExpr::Kind::Convert), @@ -357,7 +362,8 @@ TYPE_CONTEXT_PARSER("FLUSH statement"_en_US, // STREAM = scalar-default-char-variable | // STATUS = scalar-default-char-variable | // WRITE = scalar-default-char-variable -// @ | CONVERT = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable // | DISPOSE = scalar-default-char-variable TYPE_PARSER(first(construct(maybe("UNIT ="_tok) >> fileUnitNumber), construct("FILE =" >> fileNameExpr), @@ -475,6 +481,11 @@ TYPE_PARSER(first(construct(maybe("UNIT ="_tok) >> fileUnitNumber), construct("WRITE =" >> construct(pure(InquireSpec::CharVar::Kind::Write), scalarDefaultCharVariable)), + extension( + construct("CARRIAGECONTROL =" >> + construct( + pure(InquireSpec::CharVar::Kind::Carriagecontrol), + scalarDefaultCharVariable))), extension(construct( "CONVERT =" >> construct( pure(InquireSpec::CharVar::Kind::Convert), diff --git a/flang/lib/Parser/message.cpp b/flang/lib/Parser/message.cpp index 87594d64a8c11..6819ee4d83b2f 100644 --- a/flang/lib/Parser/message.cpp +++ b/flang/lib/Parser/message.cpp @@ -165,43 +165,43 @@ std::string Message::ToString() const { text_); } -void Message::ResolveProvenances(const CookedSource &cooked) { +void Message::ResolveProvenances(const AllCookedSources &allCooked) { if (CharBlock * cb{std::get_if(&location_)}) { if (std::optional resolved{ - cooked.GetProvenanceRange(*cb)}) { + allCooked.GetProvenanceRange(*cb)}) { location_ = *resolved; } } if (Message * attachment{attachment_.get()}) { - attachment->ResolveProvenances(cooked); + attachment->ResolveProvenances(allCooked); } } std::optional Message::GetProvenanceRange( - const CookedSource &cooked) const { + const AllCookedSources &allCooked) const { return std::visit( common::visitors{ - [&](CharBlock cb) { return cooked.GetProvenanceRange(cb); }, + [&](CharBlock cb) { return allCooked.GetProvenanceRange(cb); }, [](const ProvenanceRange &pr) { return std::make_optional(pr); }, }, location_); } -void Message::Emit(llvm::raw_ostream &o, const CookedSource &cooked, +void Message::Emit(llvm::raw_ostream &o, const AllCookedSources &allCooked, bool echoSourceLine) const { - std::optional provenanceRange{GetProvenanceRange(cooked)}; + std::optional provenanceRange{GetProvenanceRange(allCooked)}; std::string text; if (IsFatal()) { text += "error: "; } text += ToString(); - const AllSources &sources{cooked.allSources()}; + const AllSources &sources{allCooked.allSources()}; sources.EmitMessage(o, provenanceRange, text, echoSourceLine); if (attachmentIsContext_) { for (const Message *context{attachment_.get()}; context; context = context->attachment_.get()) { std::optional contextProvenance{ - context->GetProvenanceRange(cooked)}; + context->GetProvenanceRange(allCooked)}; text = "in the context: "; text += context->ToString(); // TODO: don't echo the source lines of a context when it's the @@ -213,7 +213,7 @@ void Message::Emit(llvm::raw_ostream &o, const CookedSource &cooked, } else { for (const Message *attachment{attachment_.get()}; attachment; attachment = attachment->attachment_.get()) { - sources.EmitMessage(o, attachment->GetProvenanceRange(cooked), + sources.EmitMessage(o, attachment->GetProvenanceRange(allCooked), attachment->ToString(), echoSourceLine); } } @@ -300,13 +300,13 @@ void Messages::Copy(const Messages &that) { } } -void Messages::ResolveProvenances(const CookedSource &cooked) { +void Messages::ResolveProvenances(const AllCookedSources &allCooked) { for (Message &m : messages_) { - m.ResolveProvenances(cooked); + m.ResolveProvenances(allCooked); } } -void Messages::Emit(llvm::raw_ostream &o, const CookedSource &cooked, +void Messages::Emit(llvm::raw_ostream &o, const AllCookedSources &allCooked, bool echoSourceLines) const { std::vector sorted; for (const auto &msg : messages_) { @@ -315,7 +315,7 @@ void Messages::Emit(llvm::raw_ostream &o, const CookedSource &cooked, std::stable_sort(sorted.begin(), sorted.end(), [](const Message *x, const Message *y) { return x->SortBefore(*y); }); for (const Message *msg : sorted) { - msg->Emit(o, cooked, echoSourceLines); + msg->Emit(o, allCooked, echoSourceLines); } } diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index b77242ae08769..819f3cf99867a 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -17,12 +17,12 @@ namespace Fortran::parser { -Parsing::Parsing(AllSources &s) : cooked_{s} {} +Parsing::Parsing(AllCookedSources &allCooked) : allCooked_{allCooked} {} Parsing::~Parsing() {} const SourceFile *Parsing::Prescan(const std::string &path, Options options) { options_ = options; - AllSources &allSources{cooked_.allSources()}; + AllSources &allSources{allCooked_.allSources()}; if (options.isModuleFile) { for (const auto &path : options.searchDirectories) { allSources.PushSearchPathDirectory(path); @@ -63,7 +63,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { preprocessor.Undefine(predef.first); } } - Prescanner prescanner{messages_, cooked_, preprocessor, options.features}; + currentCooked_ = &allCooked_.NewCookedSource(); + Prescanner prescanner{ + messages_, *currentCooked_, allSources, preprocessor, options.features}; prescanner.set_fixedForm(options.isFixedForm) .set_fixedFormColumnLimit(options.fixedFormColumns) .AddCompilerDirectiveSentinel("dir$"); @@ -77,21 +79,21 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { ProvenanceRange range{allSources.AddIncludedFile( *sourceFile, ProvenanceRange{}, options.isModuleFile)}; prescanner.Prescan(range); - if (cooked_.BufferedBytes() == 0 && !options.isModuleFile) { + if (currentCooked_->BufferedBytes() == 0 && !options.isModuleFile) { // Input is empty. Append a newline so that any warning // message about nonstandard usage will have provenance. - cooked_.Put('\n', range.start()); + currentCooked_->Put('\n', range.start()); } - cooked_.Marshal(); + currentCooked_->Marshal(allSources); if (options.needProvenanceRangeToCharBlockMappings) { - cooked_.CompileProvenanceRangeToOffsetMappings(); + currentCooked_->CompileProvenanceRangeToOffsetMappings(allSources); } return sourceFile; } void Parsing::DumpCookedChars(llvm::raw_ostream &out) const { - UserState userState{cooked_, common::LanguageFeatureControl{}}; - ParseState parseState{cooked_}; + UserState userState{allCooked_, common::LanguageFeatureControl{}}; + ParseState parseState{cooked()}; parseState.set_inFixedForm(options_.isFixedForm).set_userState(&userState); while (std::optional p{parseState.GetNextChar()}) { out << **p; @@ -99,19 +101,19 @@ void Parsing::DumpCookedChars(llvm::raw_ostream &out) const { } void Parsing::DumpProvenance(llvm::raw_ostream &out) const { - cooked_.Dump(out); + allCooked_.Dump(out); } void Parsing::DumpParsingLog(llvm::raw_ostream &out) const { - log_.Dump(out, cooked_); + log_.Dump(out, allCooked_); } void Parsing::Parse(llvm::raw_ostream &out) { - UserState userState{cooked_, options_.features}; + UserState userState{allCooked_, options_.features}; userState.set_debugOutput(out) .set_instrumentedParse(options_.instrumentedParse) .set_log(&log_); - ParseState parseState{cooked_}; + ParseState parseState{cooked()}; parseState.set_inFixedForm(options_.isFixedForm).set_userState(&userState); parseTree_ = program.Parse(parseState); CHECK( diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 5e6f13797646b..8e8e57c1334d9 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -26,14 +26,16 @@ using common::LanguageFeature; static constexpr int maxPrescannerNesting{100}; Prescanner::Prescanner(Messages &messages, CookedSource &cooked, - Preprocessor &preprocessor, common::LanguageFeatureControl lfc) - : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, - features_{lfc}, encoding_{cooked.allSources().encoding()} {} + AllSources &allSources, Preprocessor &preprocessor, + common::LanguageFeatureControl lfc) + : messages_{messages}, cooked_{cooked}, allSources_{allSources}, + preprocessor_{preprocessor}, features_{lfc}, + encoding_{allSources_.encoding()} {} Prescanner::Prescanner(const Prescanner &that) : messages_{that.messages_}, cooked_{that.cooked_}, - preprocessor_{that.preprocessor_}, features_{that.features_}, - inFixedForm_{that.inFixedForm_}, + allSources_{that.allSources_}, preprocessor_{that.preprocessor_}, + features_{that.features_}, inFixedForm_{that.inFixedForm_}, fixedFormColumnLimit_{that.fixedFormColumnLimit_}, encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + 1}, @@ -59,10 +61,10 @@ static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { } void Prescanner::Prescan(ProvenanceRange range) { - AllSources &allSources{cooked_.allSources()}; startProvenance_ = range.start(); std::size_t offset{0}; - const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)}; + const SourceFile *source{ + allSources_.GetSourceFile(startProvenance_, &offset)}; CHECK(source); start_ = source->content().data() + offset; limit_ = start_ + range.size(); @@ -84,7 +86,7 @@ void Prescanner::Prescan(ProvenanceRange range) { dir += "free"; } dir += '\n'; - TokenSequence tokens{dir, allSources.AddCompilerInsertion(dir).start()}; + TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; tokens.Emit(cooked_); } } @@ -761,14 +763,13 @@ void Prescanner::FortranInclude(const char *firstQuote) { std::string buf; llvm::raw_string_ostream error{buf}; Provenance provenance{GetProvenance(nextLine_)}; - AllSources &allSources{cooked_.allSources()}; - const SourceFile *currentFile{allSources.GetSourceFile(provenance)}; + const SourceFile *currentFile{allSources_.GetSourceFile(provenance)}; if (currentFile) { - allSources.PushSearchPathDirectory(DirectoryName(currentFile->path())); + allSources_.PushSearchPathDirectory(DirectoryName(currentFile->path())); } - const SourceFile *included{allSources.Open(path, error)}; + const SourceFile *included{allSources_.Open(path, error)}; if (currentFile) { - allSources.PopSearchPathDirectory(); + allSources_.PopSearchPathDirectory(); } if (!included) { Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); @@ -776,7 +777,7 @@ void Prescanner::FortranInclude(const char *firstQuote) { ProvenanceRange includeLineRange{ provenance, static_cast(p - nextLine_)}; ProvenanceRange fileRange{ - allSources.AddIncludedFile(*included, includeLineRange)}; + allSources_.AddIncludedFile(*included, includeLineRange)}; Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); } } diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index 0b5b64792004a..ab56ed455040b 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -33,7 +33,7 @@ class Preprocessor; class Prescanner { public: - Prescanner(Messages &, CookedSource &, Preprocessor &, + Prescanner(Messages &, CookedSource &, AllSources &, Preprocessor &, common::LanguageFeatureControl); Prescanner(const Prescanner &); @@ -65,10 +65,7 @@ class Prescanner { Provenance GetCurrentProvenance() const { return GetProvenance(at_); } template Message &Say(A &&...a) { - Message &m{messages_.Say(std::forward(a)...)}; - std::optional range{m.GetProvenanceRange(cooked_)}; - CHECK(!range || cooked_.IsValid(*range)); - return m; + return messages_.Say(std::forward(a)...); } private: @@ -124,7 +121,7 @@ class Prescanner { } void EmitInsertedChar(TokenSequence &tokens, char ch) { - Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)}; + Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; tokens.PutNextTokenChar(ch, provenance); } @@ -184,6 +181,7 @@ class Prescanner { Messages &messages_; CookedSource &cooked_; + AllSources &allSources_; Preprocessor &preprocessor_; common::LanguageFeatureControl features_; bool inFixedForm_{false}; @@ -222,9 +220,9 @@ class Prescanner { bool skipLeadingAmpersand_{false}; const Provenance spaceProvenance_{ - cooked_.allSources().CompilerInsertionProvenance(' ')}; + allSources_.CompilerInsertionProvenance(' ')}; const Provenance backslashProvenance_{ - cooked_.allSources().CompilerInsertionProvenance('\\')}; + allSources_.CompilerInsertionProvenance('\\')}; // To avoid probing the set of active compiler directive sentinel strings // on every comment line, they're checked first with a cheap Bloom filter. diff --git a/flang/lib/Parser/provenance.cpp b/flang/lib/Parser/provenance.cpp index 73e0f7154b6b1..bcb871bd7cb41 100644 --- a/flang/lib/Parser/provenance.cpp +++ b/flang/lib/Parser/provenance.cpp @@ -400,12 +400,9 @@ const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const { return origin_[low]; } -CookedSource::CookedSource(AllSources &s) : allSources_{s} {} -CookedSource::~CookedSource() {} - std::optional CookedSource::GetProvenanceRange( CharBlock cookedRange) const { - if (!IsValid(cookedRange)) { + if (!Contains(cookedRange)) { return std::nullopt; } ProvenanceRange first{provenanceMap_.Map(cookedRange.begin() - &data_[0])}; @@ -416,34 +413,6 @@ std::optional CookedSource::GetProvenanceRange( return {ProvenanceRange{first.start(), last.start() - first.start()}}; } -std::optional CookedSource::GetCharBlockFromLineAndColumns( - int line, int startColumn, int endColumn) const { - // 2nd column is exclusive, meaning it is target column + 1. - CHECK(line > 0 && startColumn > 0 && endColumn > 0); - CHECK(startColumn < endColumn); - auto provenanceStart{allSources_.GetFirstFileProvenance().value().start()}; - if (auto sourceFile{allSources_.GetSourceFile(provenanceStart)}) { - CHECK(line <= static_cast(sourceFile->lines())); - return GetCharBlock(ProvenanceRange(sourceFile->GetLineStartOffset(line) + - provenanceStart.offset() + startColumn - 1, - endColumn - startColumn)); - } - return std::nullopt; -} - -std::optional> -CookedSource::GetSourcePositionRange(CharBlock cookedRange) const { - if (auto range{GetProvenanceRange(cookedRange)}) { - if (auto firstOffset{allSources_.GetSourcePosition(range->start())}) { - if (auto secondOffset{ - allSources_.GetSourcePosition(range->start() + range->size())}) { - return std::pair{*firstOffset, *secondOffset}; - } - } - } - return std::nullopt; -} - std::optional CookedSource::GetCharBlock( ProvenanceRange range) const { CHECK(!invertedMap_.empty() && @@ -457,16 +426,17 @@ std::optional CookedSource::GetCharBlock( std::size_t CookedSource::BufferedBytes() const { return buffer_.bytes(); } -void CookedSource::Marshal() { +void CookedSource::Marshal(AllSources &allSources) { CHECK(provenanceMap_.SizeInBytes() == buffer_.bytes()); - provenanceMap_.Put(allSources_.AddCompilerInsertion("(after end of source)")); + provenanceMap_.Put(allSources.AddCompilerInsertion("(after end of source)")); data_ = buffer_.Marshal(); buffer_.clear(); } -void CookedSource::CompileProvenanceRangeToOffsetMappings() { +void CookedSource::CompileProvenanceRangeToOffsetMappings( + AllSources &allSources) { if (invertedMap_.empty()) { - invertedMap_ = provenanceMap_.Invert(allSources_); + invertedMap_ = provenanceMap_.Invert(allSources); } } @@ -534,12 +504,73 @@ llvm::raw_ostream &AllSources::Dump(llvm::raw_ostream &o) const { } llvm::raw_ostream &CookedSource::Dump(llvm::raw_ostream &o) const { - o << "CookedSource:\n"; - allSources_.Dump(o); o << "CookedSource::provenanceMap_:\n"; provenanceMap_.Dump(o); o << "CookedSource::invertedMap_:\n"; invertedMap_.Dump(o); return o; } + +AllCookedSources::AllCookedSources(AllSources &s) : allSources_{s} {} +AllCookedSources::~AllCookedSources() {} + +CookedSource &AllCookedSources::NewCookedSource() { + return cooked_.emplace_back(); +} + +std::optional AllCookedSources::GetProvenanceRange( + CharBlock cb) const { + if (const CookedSource * c{Find(cb)}) { + return c->GetProvenanceRange(cb); + } else { + return std::nullopt; + } +} + +std::optional AllCookedSources::GetCharBlockFromLineAndColumns( + int line, int startColumn, int endColumn) const { + // 2nd column is exclusive, meaning it is target column + 1. + CHECK(line > 0 && startColumn > 0 && endColumn > 0); + CHECK(startColumn < endColumn); + auto provenanceStart{allSources_.GetFirstFileProvenance().value().start()}; + if (auto sourceFile{allSources_.GetSourceFile(provenanceStart)}) { + CHECK(line <= static_cast(sourceFile->lines())); + return GetCharBlock(ProvenanceRange(sourceFile->GetLineStartOffset(line) + + provenanceStart.offset() + startColumn - 1, + endColumn - startColumn)); + } + return std::nullopt; +} + +std::optional> +AllCookedSources::GetSourcePositionRange(CharBlock cookedRange) const { + if (auto range{GetProvenanceRange(cookedRange)}) { + if (auto firstOffset{allSources_.GetSourcePosition(range->start())}) { + if (auto secondOffset{ + allSources_.GetSourcePosition(range->start() + range->size())}) { + return std::pair{*firstOffset, *secondOffset}; + } + } + } + return std::nullopt; +} + +std::optional AllCookedSources::GetCharBlock( + ProvenanceRange range) const { + for (const auto &c : cooked_) { + if (auto result{c.GetCharBlock(range)}) { + return result; + } + } + return nullptr; +} + +void AllCookedSources::Dump(llvm::raw_ostream &o) const { + o << "AllSources:\n"; + allSources_.Dump(o); + for (const auto &c : cooked_) { + c.Dump(o); + } +} + } // namespace Fortran::parser diff --git a/flang/lib/Semantics/check-io.cpp b/flang/lib/Semantics/check-io.cpp index d00f56c38042d..26702f6c48bf9 100644 --- a/flang/lib/Semantics/check-io.cpp +++ b/flang/lib/Semantics/check-io.cpp @@ -135,6 +135,9 @@ void IoChecker::Enter(const parser::ConnectSpec::CharExpr &spec) { case ParseKind::Sign: specKind = IoSpecKind::Sign; break; + case ParseKind::Carriagecontrol: + specKind = IoSpecKind::Carriagecontrol; + break; case ParseKind::Convert: specKind = IoSpecKind::Convert; break; @@ -152,6 +155,13 @@ void IoChecker::Enter(const parser::ConnectSpec::CharExpr &spec) { flags_.set(Flag::AccessStream, s == "STREAM"); } CheckStringValue(specKind, *charConst, parser::FindSourceLocation(spec)); + if (specKind == IoSpecKind::Carriagecontrol && + (s == "FORTRAN" || s == "NONE")) { + context_.Say(parser::FindSourceLocation(spec), + "Unimplemented %s value '%s'"_err_en_US, + parser::ToUpperCaseLetters(common::EnumToString(specKind)), + *charConst); + } } } @@ -378,6 +388,9 @@ void IoChecker::Enter(const parser::InquireSpec::CharVar &spec) { case ParseKind::Write: specKind = IoSpecKind::Write; break; + case ParseKind::Carriagecontrol: + specKind = IoSpecKind::Carriagecontrol; + break; case ParseKind::Convert: specKind = IoSpecKind::Convert; break; @@ -821,6 +834,7 @@ void IoChecker::CheckStringValue(IoSpecKind specKind, const std::string &value, {IoSpecKind::Status, // Open values; Close values are {"DELETE", "KEEP"}. {"NEW", "OLD", "REPLACE", "SCRATCH", "UNKNOWN"}}, + {IoSpecKind::Carriagecontrol, {"LIST", "FORTRAN", "NONE"}}, {IoSpecKind::Convert, {"BIG_ENDIAN", "LITTLE_ENDIAN", "NATIVE"}}, {IoSpecKind::Dispose, {"DELETE", "KEEP"}}, }; diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 6fa59f0a82a08..ef62a94b1b89e 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -751,7 +751,7 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) { return it->second->scope(); } } - parser::Parsing parsing{context_.allSources()}; + parser::Parsing parsing{context_.allCookedSources()}; parser::Options options; options.isModuleFile = true; options.features.Enable(common::LanguageFeature::BackslashEscapes); @@ -796,7 +796,6 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) { } auto &modSymbol{*it->second}; modSymbol.set(Symbol::Flag::ModFile); - modSymbol.scope()->set_chars(parsing.cooked()); return modSymbol.scope(); } diff --git a/flang/lib/Semantics/scope.cpp b/flang/lib/Semantics/scope.cpp index a2a9e1dbe9e73..c7635c0b1a3bb 100644 --- a/flang/lib/Semantics/scope.cpp +++ b/flang/lib/Semantics/scope.cpp @@ -217,14 +217,6 @@ DeclTypeSpec &Scope::MakeDerivedType( return declTypeSpecs_.emplace_back(category, std::move(spec)); } -void Scope::set_chars(parser::CookedSource &cooked) { - CHECK(kind_ == Kind::Module); - CHECK(parent_.IsGlobal() || parent_.IsModuleFile()); - CHECK(DEREF(symbol_).test(Symbol::Flag::ModFile)); - // TODO: Preserve the CookedSource rather than acquiring its string. - chars_ = cooked.AcquireData(); -} - Scope::ImportKind Scope::GetImportKind() const { if (importKind_) { return *importKind_; diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 24d0baa9c2ae8..b5b7802c22a1a 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -45,17 +45,17 @@ namespace Fortran::semantics { -using NameToSymbolMap = std::map; +using NameToSymbolMap = std::multimap; static void DoDumpSymbols(llvm::raw_ostream &, const Scope &, int indent = 0); static void PutIndent(llvm::raw_ostream &, int indent); static void GetSymbolNames(const Scope &scope, NameToSymbolMap &symbols) { // Finds all symbol names in the scope without collecting duplicates. for (const auto &pair : scope) { - symbols.emplace(pair.second->name().begin(), *pair.second); + symbols.emplace(pair.second->name(), *pair.second); } for (const auto &pair : scope.commonBlocks()) { - symbols.emplace(pair.second->name().begin(), *pair.second); + symbols.emplace(pair.second->name(), *pair.second); } for (const auto &child : scope.children()) { GetSymbolNames(child, symbols); @@ -181,9 +181,9 @@ static bool PerformStatementSemantics( SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, - parser::AllSources &allSources) + parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allSources_{allSources}, + allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, foldingContext_{ parser::ContextualMessages{&messages_}, defaultKinds_, intrinsics_} {} @@ -351,7 +351,7 @@ bool Semantics::Perform() { } void Semantics::EmitMessages(llvm::raw_ostream &os) const { - context_.messages().Emit(os, cooked_); + context_.messages().Emit(os, context_.allCookedSources()); } void Semantics::DumpSymbols(llvm::raw_ostream &os) { @@ -361,9 +361,10 @@ void Semantics::DumpSymbols(llvm::raw_ostream &os) { void Semantics::DumpSymbolsSources(llvm::raw_ostream &os) const { NameToSymbolMap symbols; GetSymbolNames(context_.globalScope(), symbols); + const parser::AllCookedSources &allCooked{context_.allCookedSources()}; for (const auto &pair : symbols) { const Symbol &symbol{pair.second}; - if (auto sourceInfo{cooked_.GetSourcePositionRange(symbol.name())}) { + if (auto sourceInfo{allCooked.GetSourcePositionRange(symbol.name())}) { os << symbol.name().ToString() << ": " << sourceInfo->first.file.path() << ", " << sourceInfo->first.line << ", " << sourceInfo->first.column << "-" << sourceInfo->second.column << "\n"; diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 30f343773f90d..18c3f8241f08f 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -655,6 +655,31 @@ bool IONAME(SetAsynchronous)( } } +bool IONAME(SetCarriagecontrol)( + Cookie cookie, const char *keyword, std::size_t length) { + IoStatementState &io{*cookie}; + auto *open{io.get_if()}; + if (!open) { + io.GetIoErrorHandler().Crash( + "SetCarriageControl() called when not in an OPEN statement"); + } + static const char *keywords[]{"LIST", "FORTRAN", "NONE", nullptr}; + switch (IdentifyValue(keyword, length, keywords)) { + case 0: + return true; + case 1: + case 2: + open->SignalError(IostatErrorInKeyword, + "Unimplemented CARRIAGECONTROL='%.*s'", static_cast(length), + keyword); + return false; + default: + open->SignalError(IostatErrorInKeyword, "Invalid CARRIAGECONTROL='%.*s'", + static_cast(length), keyword); + return false; + } +} + bool IONAME(SetConvert)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; @@ -708,7 +733,7 @@ bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { auto *open{io.get_if()}; if (!open) { io.GetIoErrorHandler().Crash( - "SetEncoding() called when not in an OPEN statement"); + "SetForm() called when not in an OPEN statement"); } static const char *keywords[]{"FORMATTED", "UNFORMATTED", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { diff --git a/flang/runtime/io-api.h b/flang/runtime/io-api.h index a38152d6ec1c1..369013fee8bc1 100644 --- a/flang/runtime/io-api.h +++ b/flang/runtime/io-api.h @@ -260,6 +260,8 @@ bool IONAME(SetAccess)(Cookie, const char *, std::size_t); bool IONAME(SetAction)(Cookie, const char *, std::size_t); // ASYNCHRONOUS=YES, NO bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); +// CARRIAGECONTROL=LIST, FORTRAN, NONE +bool IONAME(SetCarriagecontrol)(Cookie, const char *, std::size_t); // CONVERT=NATIVE, LITTLE_ENDIAN, BIG_ENDIAN, or SWAP bool IONAME(SetConvert)(Cookie, const char *, std::size_t); // ENCODING=UTF-8, DEFAULT diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 8300b1ea3c27b..9bf0284358b96 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -779,6 +779,9 @@ bool InquireUnitState::Inquire( : unit().modes.editingFlags & blankZero ? "ZERO" : "NULL"; break; + case HashInquiryKeyword("CARRIAGECONTROL"): + str = "LIST"; + break; case HashInquiryKeyword("CONVERT"): str = unit().swapEndianness() ? "SWAP" : "NATIVE"; break; @@ -976,6 +979,7 @@ bool InquireNoUnitState::Inquire( case HashInquiryKeyword("ACTION"): case HashInquiryKeyword("ASYNCHRONOUS"): case HashInquiryKeyword("BLANK"): + case HashInquiryKeyword("CARRIAGECONTROL"): case HashInquiryKeyword("CONVERT"): case HashInquiryKeyword("DECIMAL"): case HashInquiryKeyword("DELIM"): @@ -1061,6 +1065,7 @@ bool InquireUnconnectedFileState::Inquire( case HashInquiryKeyword("ACTION"): case HashInquiryKeyword("ASYNCHRONOUS"): case HashInquiryKeyword("BLANK"): + case HashInquiryKeyword("CARRIAGECONTROL"): case HashInquiryKeyword("CONVERT"): case HashInquiryKeyword("DECIMAL"): case HashInquiryKeyword("DELIM"): diff --git a/flang/test/Semantics/getsymbols01.f90 b/flang/test/Semantics/getsymbols01.f90 index 8f50304825dc7..bdb7bf053823a 100644 --- a/flang/test/Semantics/getsymbols01.f90 +++ b/flang/test/Semantics/getsymbols01.f90 @@ -16,10 +16,10 @@ recursive pure function f() result(x) end module ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! CHECK-COUNT-1:f:{{.*}}getsymbols01.f90, 12, 26-27 ! CHECK-COUNT-1:mm1:{{.*}}getsymbols01.f90, 2, 8-11 ! CHECK-COUNT-1:s:{{.*}}getsymbols01.f90, 5, 18-19 -! CHECK-COUNT-1:x:{{.*}}getsymbols01.f90, 5, 21-22 -! CHECK-COUNT-1:y:{{.*}}getsymbols01.f90, 5, 24-25 ! CHECK-COUNT-1:ss:{{.*}}getsymbols01.f90, 9, 19-21 -! CHECK-COUNT-1:f:{{.*}}getsymbols01.f90, 12, 26-27 +! CHECK-COUNT-1:x:{{.*}}getsymbols01.f90, 5, 21-22 ! CHECK-COUNT-1:x:{{.*}}getsymbols01.f90, 13, 24-25 +! CHECK-COUNT-1:y:{{.*}}getsymbols01.f90, 5, 24-25 diff --git a/flang/test/Semantics/getsymbols02.f90 b/flang/test/Semantics/getsymbols02.f90 index 4c8f0710eb23d..80b7651f029b2 100644 --- a/flang/test/Semantics/getsymbols02.f90 +++ b/flang/test/Semantics/getsymbols02.f90 @@ -10,5 +10,5 @@ PROGRAM helloworld ! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-a.f90 ! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-b.f90 ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s -! CHECK: get5: mm2a -! CHECK: callget5: mm2b +! CHECK: callget5: ./mm2b.mod, +! CHECK: get5: ./mm2a.mod, diff --git a/flang/test/Semantics/getsymbols03-a.f90 b/flang/test/Semantics/getsymbols03-a.f90 index c11aee03048c9..980d6bc58c1a6 100644 --- a/flang/test/Semantics/getsymbols03-a.f90 +++ b/flang/test/Semantics/getsymbols03-a.f90 @@ -8,7 +8,7 @@ program main end program ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s -! CHECK:mm3:{{.*}}getsymbols03-b.f90, 1, 8-11 ! CHECK:f:{{.*}}getsymbols03-b.f90, 2, 12-13 ! CHECK:main:{{.*}}getsymbols03-a.f90, 4, 9-13 +! CHECK:mm3:{{.*}}getsymbols03-b.f90, 1, 8-11 ! CHECK:x:{{.*}}getsymbols03-a.f90, 6, 13-14 diff --git a/flang/test/Semantics/getsymbols04.f90 b/flang/test/Semantics/getsymbols04.f90 index 4decfc78560ad..fc9b177abd903 100644 --- a/flang/test/Semantics/getsymbols04.f90 +++ b/flang/test/Semantics/getsymbols04.f90 @@ -8,5 +8,5 @@ program main ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s ! CHECK:x:{{.*}}getsymbols04.f90, 3, 14-15 -! CHECK:y:{{.*}}getsymbols04.f90, 4, 14-15 ! CHECK:x:{{.*}}getsymbols04.f90, 5, 11-12 +! CHECK:y:{{.*}}getsymbols04.f90, 4, 14-15 diff --git a/flang/test/Semantics/getsymbols05.f90 b/flang/test/Semantics/getsymbols05.f90 index 30dcb2278e002..624f37a74b763 100644 --- a/flang/test/Semantics/getsymbols05.f90 +++ b/flang/test/Semantics/getsymbols05.f90 @@ -11,5 +11,5 @@ program main ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s ! CHECK:x:{{.*}}getsymbols05.f90, 3, 14-15 -! CHECK:y:{{.*}}getsymbols05.f90, 4, 14-15 ! CHECK:x:{{.*}}getsymbols05.f90, 6, 16-17 +! CHECK:y:{{.*}}getsymbols05.f90, 4, 14-15 diff --git a/flang/test/Semantics/io01.f90 b/flang/test/Semantics/io01.f90 index 9828d4afe8921..17b68e407407e 100644 --- a/flang/test/Semantics/io01.f90 +++ b/flang/test/Semantics/io01.f90 @@ -62,6 +62,7 @@ open(81, convert=convert_(2), dispose=dispose_(2)) open(access='STREAM', 90) ! nonstandard + open (unit=91, file='xfile', carriagecontrol='list') ! nonstandard !ERROR: OPEN statement must have a UNIT or NEWUNIT specifier !ERROR: If ACCESS='DIRECT' appears, RECL must also appear @@ -127,4 +128,10 @@ !ERROR: If NEWUNIT appears, FILE or STATUS='SCRATCH' must also appear open(newunit=nn, status='old') + + !ERROR: Unimplemented CARRIAGECONTROL value 'fortran' + open (unit=116, file='xfile', carriagecontrol='fortran') ! nonstandard + + !ERROR: Invalid CARRIAGECONTROL value 'nonsense' + open (unit=116, file='xfile', carriagecontrol='nonsense') ! nonstandard end diff --git a/flang/test/Semantics/io05.f90 b/flang/test/Semantics/io05.f90 index ed6b77f7d4ad9..666b200ad9a3c 100644 --- a/flang/test/Semantics/io05.f90 +++ b/flang/test/Semantics/io05.f90 @@ -25,6 +25,7 @@ inquire(pending=v(5), file='abc') inquire(10, id=id, pending=v(5)) inquire(10, id=const_id, pending=v(5)) + inquire(10, carriagecontrol=c(1)) ! nonstandard ! using variable 'cv' multiple times seems to be allowed inquire(file='abc', & diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp b/flang/tools/f18-parse-demo/f18-parse-demo.cpp index 60303aa7a24ff..4ccc65e0631d9 100644 --- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp +++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp @@ -160,14 +160,15 @@ std::string CompileFortran( } options.searchDirectories = driver.searchDirectories; Fortran::parser::AllSources allSources; - Fortran::parser::Parsing parsing{allSources}; + Fortran::parser::AllCookedSources allCookedSources{allSources}; + Fortran::parser::Parsing parsing{allCookedSources}; auto start{CPUseconds()}; parsing.Prescan(path, options); if (!parsing.messages().empty() && (driver.warningsAreErrors || parsing.messages().AnyFatalError())) { llvm::errs() << driver.prefix << "could not scan " << path << '\n'; - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), parsing.allCooked()); exitStatus = EXIT_FAILURE; return {}; } @@ -191,7 +192,7 @@ std::string CompileFortran( } parsing.ClearLog(); - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), parsing.allCooked()); if (!parsing.consumedWholeFile()) { parsing.EmitMessage(llvm::errs(), parsing.finalRestingPlace(), "parser FAIL (final position)"); diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index 156c2337d0c8f..a33a167686e49 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -188,9 +188,10 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, DriverOptions &driver, const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds) { Fortran::parser::AllSources allSources; + Fortran::parser::AllCookedSources allCookedSources{allSources}; allSources.set_encoding(driver.encoding); Fortran::semantics::SemanticsContext semanticsContext{ - defaultKinds, options.features, allSources}; + defaultKinds, options.features, allCookedSources}; semanticsContext.set_moduleDirectory(driver.moduleDirectory) .set_moduleFileSuffix(driver.moduleFileSuffix) .set_searchDirectories(driver.searchDirectories) @@ -204,12 +205,12 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, } } options.searchDirectories = driver.searchDirectories; - Fortran::parser::Parsing parsing{semanticsContext.allSources()}; + Fortran::parser::Parsing parsing{allCookedSources}; parsing.Prescan(path, options); if (!parsing.messages().empty() && (driver.warningsAreErrors || parsing.messages().AnyFatalError())) { llvm::errs() << driver.prefix << "could not scan " << path << '\n'; - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), allCookedSources); exitStatus = EXIT_FAILURE; return {}; } @@ -218,7 +219,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, return {}; } if (driver.dumpCookedChars) { - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), allCookedSources); parsing.DumpCookedChars(llvm::outs()); return {}; } @@ -228,7 +229,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, return {}; } parsing.ClearLog(); - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), allCookedSources); if (!parsing.consumedWholeFile()) { parsing.EmitMessage(llvm::errs(), parsing.finalRestingPlace(), "parser FAIL (final position)"); @@ -274,7 +275,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, return {}; } if (driver.getDefinition) { - if (auto cb{parsing.cooked().GetCharBlockFromLineAndColumns( + if (auto cb{allCookedSources.GetCharBlockFromLineAndColumns( driver.getDefinitionArgs.line, driver.getDefinitionArgs.startColumn, driver.getDefinitionArgs.endColumn)}) { @@ -283,7 +284,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, llvm::errs() << "Found symbol name: " << symbol->name().ToString() << "\n"; if (auto sourceInfo{ - parsing.cooked().GetSourcePositionRange(symbol->name())}) { + allCookedSources.GetSourcePositionRange(symbol->name())}) { llvm::outs() << symbol->name().ToString() << ": " << sourceInfo->first.file.path() << ", " << sourceInfo->first.line << ", " diff --git a/flang/unittests/Evaluate/intrinsics.cpp b/flang/unittests/Evaluate/intrinsics.cpp index 3b9805946286d..4f2a21dfe6048 100644 --- a/flang/unittests/Evaluate/intrinsics.cpp +++ b/flang/unittests/Evaluate/intrinsics.cpp @@ -22,9 +22,9 @@ class CookedStrings { } void Save(const std::string &s) { offsets_[s] = cooked_.Put(s); - cooked_.PutProvenance(cooked_.allSources().AddCompilerInsertion(s)); + cooked_.PutProvenance(allSources_.AddCompilerInsertion(s)); } - void Marshal() { cooked_.Marshal(); } + void Marshal() { cooked_.Marshal(allSources_); } parser::CharBlock operator()(const std::string &s) { return {cooked_.data().data() + offsets_[s], s.size()}; } @@ -32,12 +32,13 @@ class CookedStrings { return parser::ContextualMessages{cooked_.data(), &buffer}; } void Emit(llvm::raw_ostream &o, const parser::Messages &messages) { - messages.Emit(o, cooked_); + messages.Emit(o, allCookedSources_); } private: parser::AllSources allSources_; - parser::CookedSource cooked_{allSources_}; + parser::AllCookedSources allCookedSources_{allSources_}; + parser::CookedSource &cooked_{allCookedSources_.NewCookedSource()}; std::map offsets_; }; diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 063fe401da8b5..33ae64c0a08cb 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -199,6 +199,12 @@ def MathAPI : PublicAPI<"math.h"> { "modfl", "expf", "exp2f", + "remainderf", + "remainder", + "remainderl", + "remquof", + "remquo", + "remquol", "round", "roundf", "roundl", diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index c24173b1d0e77..6aca5e400d68a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -103,6 +103,12 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.modf libc.src.math.modff libc.src.math.modfl + libc.src.math.remainderf + libc.src.math.remainder + libc.src.math.remainderl + libc.src.math.remquof + libc.src.math.remquo + libc.src.math.remquol libc.src.math.round libc.src.math.roundf libc.src.math.roundl diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 15fc12d375e63..77fa971adc614 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -310,6 +310,14 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"expf", RetValSpec, [ArgSpec]>, FunctionSpec<"exp2f", RetValSpec, [ArgSpec]>, + FunctionSpec<"remainderf", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"remainder", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"remainderl", RetValSpec, [ArgSpec, ArgSpec]>, + + FunctionSpec<"remquof", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"remquo", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"remquol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"round", RetValSpec, [ArgSpec]>, FunctionSpec<"roundf", RetValSpec, [ArgSpec]>, FunctionSpec<"roundl", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 0c878de2ac95d..3b4f821726576 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -521,3 +521,75 @@ add_entrypoint_object( COMPILE_OPTIONS -O2 ) + +add_entrypoint_object( + remquof + SRCS + remquof.cpp + HDRS + remquof.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remquo + SRCS + remquo.cpp + HDRS + remquo.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remquol + SRCS + remquol.cpp + HDRS + remquol.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remainderf + SRCS + remainderf.cpp + HDRS + remainderf.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remainder + SRCS + remainder.cpp + HDRS + remainder.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remainderl + SRCS + remainderl.cpp + HDRS + remainderl.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) diff --git a/libc/src/math/remainder.cpp b/libc/src/math/remainder.cpp new file mode 100644 index 0000000000000..880e6a60f4a1d --- /dev/null +++ b/libc/src/math/remainder.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remainder function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +double LLVM_LIBC_ENTRYPOINT(remainder)(double x, double y) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remainder.h b/libc/src/math/remainder.h new file mode 100644 index 0000000000000..8a720fc23b6be --- /dev/null +++ b/libc/src/math/remainder.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remainder ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDER_H +#define LLVM_LIBC_SRC_MATH_REMAINDER_H + +namespace __llvm_libc { + +double remainder(double x, double y); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMAINDER_H diff --git a/libc/src/math/remainderf.cpp b/libc/src/math/remainderf.cpp new file mode 100644 index 0000000000000..bab320101d58c --- /dev/null +++ b/libc/src/math/remainderf.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remainderf function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +float LLVM_LIBC_ENTRYPOINT(remainderf)(float x, float y) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remainderf.h b/libc/src/math/remainderf.h new file mode 100644 index 0000000000000..19a16d08a94d3 --- /dev/null +++ b/libc/src/math/remainderf.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remainderf --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDERF_H +#define LLVM_LIBC_SRC_MATH_REMAINDERF_H + +namespace __llvm_libc { + +float remainderf(float x, float y); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMAINDERF_H diff --git a/libc/src/math/remainderl.cpp b/libc/src/math/remainderl.cpp new file mode 100644 index 0000000000000..bd9bc4985d967 --- /dev/null +++ b/libc/src/math/remainderl.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remainderl function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +long double LLVM_LIBC_ENTRYPOINT(remainderl)(long double x, long double y) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remainderl.h b/libc/src/math/remainderl.h new file mode 100644 index 0000000000000..f2837635ab77c --- /dev/null +++ b/libc/src/math/remainderl.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remainderl --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDERL_H +#define LLVM_LIBC_SRC_MATH_REMAINDERL_H + +namespace __llvm_libc { + +long double remainderl(long double x, long double y); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMAINDERL_H diff --git a/libc/src/math/remquo.cpp b/libc/src/math/remquo.cpp new file mode 100644 index 0000000000000..b61d7d4d1bed5 --- /dev/null +++ b/libc/src/math/remquo.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of remquo function ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +double LLVM_LIBC_ENTRYPOINT(remquo)(double x, double y, int *exp) { + return fputil::remquo(x, y, *exp); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remquo.h b/libc/src/math/remquo.h new file mode 100644 index 0000000000000..cb753fee6ea0d --- /dev/null +++ b/libc/src/math/remquo.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remquo ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUO_H +#define LLVM_LIBC_SRC_MATH_REMQUO_H + +namespace __llvm_libc { + +double remquo(double x, double y, int *exp); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMQUO_H diff --git a/libc/src/math/remquof.cpp b/libc/src/math/remquof.cpp new file mode 100644 index 0000000000000..246bee038f116 --- /dev/null +++ b/libc/src/math/remquof.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of remquof function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +float LLVM_LIBC_ENTRYPOINT(remquof)(float x, float y, int *exp) { + return fputil::remquo(x, y, *exp); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remquof.h b/libc/src/math/remquof.h new file mode 100644 index 0000000000000..feb2e4f5e0dd5 --- /dev/null +++ b/libc/src/math/remquof.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remquof -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUOF_H +#define LLVM_LIBC_SRC_MATH_REMQUOF_H + +namespace __llvm_libc { + +float remquof(float x, float y, int *exp); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMQUOF_H diff --git a/libc/src/math/remquol.cpp b/libc/src/math/remquol.cpp new file mode 100644 index 0000000000000..8e0287682dbfe --- /dev/null +++ b/libc/src/math/remquol.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remquol function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +long double LLVM_LIBC_ENTRYPOINT(remquol)(long double x, long double y, + int *exp) { + return fputil::remquo(x, y, *exp); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remquol.h b/libc/src/math/remquol.h new file mode 100644 index 0000000000000..d1b0e20fcc865 --- /dev/null +++ b/libc/src/math/remquol.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remquol -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUOL_H +#define LLVM_LIBC_SRC_MATH_REMQUOL_H + +namespace __llvm_libc { + +long double remquol(long double x, long double y, int *exp); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMQUOL_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 07b5052074528..e1bac1a339067 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -552,3 +552,42 @@ add_fp_unittest( libc.src.math.sqrtl libc.utils.FPUtil.fputil ) + +add_fp_unittest( + remquof_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + remquof_test.cpp + DEPENDS + libc.include.math + libc.src.math.remquof + libc.utils.FPUtil.fputil +) + +add_fp_unittest( + remquo_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + remquo_test.cpp + DEPENDS + libc.include.math + libc.src.math.remquo + libc.utils.FPUtil.fputil +) + +add_fp_unittest( + remquol_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + remquol_test.cpp + DEPENDS + libc.include.math + libc.src.math.remquol + libc.utils.FPUtil.fputil +) diff --git a/libc/test/src/math/remquo_test.cpp b/libc/test/src/math/remquo_test.cpp new file mode 100644 index 0000000000000..0ebbed7224b22 --- /dev/null +++ b/libc/test/src/math/remquo_test.cpp @@ -0,0 +1,91 @@ +//===-- Unittests for remquo ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/math.h" +#include "src/math/remquo.h" +#include "utils/FPUtil/BasicOperations.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +namespace mpfr = __llvm_libc::testing::mpfr; + +static const double zero = FPBits::zero(); +static const double negZero = FPBits::negZero(); +static const double nan = FPBits::buildNaN(1); +static const double inf = FPBits::inf(); +static const double negInf = FPBits::negInf(); + +TEST(RemquoTest, SpecialNumbers) { + int exponent; + double x, y; + + y = 1.0; + x = inf; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + x = negInf; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + x = 1.0; + y = zero; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + y = negZero; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + y = nan; + x = 1.0; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + y = 1.0; + x = nan; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + x = nan; + y = nan; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + x = zero; + y = 1.0; + EXPECT_FP_EQ(__llvm_libc::remquo(x, y, &exponent), zero); + + x = negZero; + y = 1.0; + EXPECT_FP_EQ(__llvm_libc::remquo(x, y, &exponent), negZero); +} + +TEST(RemquoTest, SubnormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = + (FPBits::maxSubnormal - FPBits::minSubnormal) / count; + for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal; + v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal; + v += step, w -= step) { + double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquo(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} + +TEST(RemquoTest, NormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count; + for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal; + v <= FPBits::maxNormal && w >= FPBits::minNormal; v += step, w -= step) { + double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquo(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} diff --git a/libc/test/src/math/remquof_test.cpp b/libc/test/src/math/remquof_test.cpp new file mode 100644 index 0000000000000..0c51d5f5324df --- /dev/null +++ b/libc/test/src/math/remquof_test.cpp @@ -0,0 +1,91 @@ +//===-- Unittests for remquof ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/math.h" +#include "src/math/remquof.h" +#include "utils/FPUtil/BasicOperations.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +namespace mpfr = __llvm_libc::testing::mpfr; + +static const float zero = FPBits::zero(); +static const float negZero = FPBits::negZero(); +static const float nan = FPBits::buildNaN(1); +static const float inf = FPBits::inf(); +static const float negInf = FPBits::negInf(); + +TEST(RemquofTest, SpecialNumbers) { + int exponent; + float x, y; + + y = 1.0f; + x = inf; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + x = negInf; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + x = 1.0f; + y = zero; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + y = negZero; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + y = nan; + x = 1.0f; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + y = 1.0f; + x = nan; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + x = nan; + y = nan; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + x = zero; + y = 1.0f; + EXPECT_FP_EQ(__llvm_libc::remquof(x, y, &exponent), zero); + + x = negZero; + y = 1.0f; + EXPECT_FP_EQ(__llvm_libc::remquof(x, y, &exponent), negZero); +} + +TEST(RemquofTest, SubnormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = + (FPBits::maxSubnormal - FPBits::minSubnormal) / count; + for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal; + v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal; + v += step, w -= step) { + float x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquof(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} + +TEST(RemquofTest, NormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count; + for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal; + v <= FPBits::maxNormal && w >= FPBits::minNormal; v += step, w -= step) { + float x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquof(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} diff --git a/libc/test/src/math/remquol_test.cpp b/libc/test/src/math/remquol_test.cpp new file mode 100644 index 0000000000000..eab3a5fb1fa6a --- /dev/null +++ b/libc/test/src/math/remquol_test.cpp @@ -0,0 +1,97 @@ +//===-- Unittests for remquol ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/math.h" +#include "src/math/remquol.h" +#include "utils/FPUtil/BasicOperations.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +namespace mpfr = __llvm_libc::testing::mpfr; + +static const long double zero = FPBits::zero(); +static const long double negZero = FPBits::negZero(); +static const long double nan = FPBits::buildNaN(1); +static const long double inf = FPBits::inf(); +static const long double negInf = FPBits::negInf(); + +TEST(RemquoTest, SpecialNumbers) { + int exponent; + long double x, y; + + y = 1.0l; + x = inf; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + x = negInf; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + x = 1.0l; + y = zero; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + y = negZero; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + y = nan; + x = 1.0l; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + y = 1.0l; + x = nan; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + x = nan; + y = nan; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + x = zero; + y = 1.0l; + EXPECT_FP_EQ(__llvm_libc::remquol(x, y, &exponent), zero); + + x = negZero; + y = 1.0l; + EXPECT_FP_EQ(__llvm_libc::remquol(x, y, &exponent), negZero); +} + +TEST(RemquofTest, SubnormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = + (FPBits::maxSubnormal - FPBits::minSubnormal) / count; + for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal; + v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal; + v += step, w -= step) { + long double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquol(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} + +TEST(RemquofTest, NormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count; + for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal; + v <= FPBits::maxNormal && w >= FPBits::minNormal; v += step, w -= step) { + long double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + result.f = __llvm_libc::remquol(x, y, &result.i); + // In normal range on x86 platforms, the implicit 1 bit can be zero making + // the numbers NaN. Hence we test for them separately. + if (isnan(x) || isnan(y)) { + ASSERT_NE(isnan(result.f), 0); + } else { + mpfr::BinaryInput input{x, y}; + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } + } +} diff --git a/libc/utils/FPUtil/CMakeLists.txt b/libc/utils/FPUtil/CMakeLists.txt index 745ede30fedd6..8a6cc36a7b516 100644 --- a/libc/utils/FPUtil/CMakeLists.txt +++ b/libc/utils/FPUtil/CMakeLists.txt @@ -11,6 +11,7 @@ add_header_library( BasicOperations.h BitPatterns.h ClassificationFunctions.h + DivisionAndRemainderOperations.h FloatOperations.h FloatProperties.h FPBits.h diff --git a/libc/utils/FPUtil/DivisionAndRemainderOperations.h b/libc/utils/FPUtil/DivisionAndRemainderOperations.h new file mode 100644 index 0000000000000..ceae538027f3c --- /dev/null +++ b/libc/utils/FPUtil/DivisionAndRemainderOperations.h @@ -0,0 +1,111 @@ +//===-- Floating point divsion and remainder operations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UTILS_FPUTIL_DIVISION_AND_REMAINDER_OPERATIONS_H +#define LLVM_LIBC_UTILS_FPUTIL_DIVISION_AND_REMAINDER_OPERATIONS_H + +#include "FPBits.h" +#include "ManipulationFunctions.h" +#include "NormalFloat.h" + +#include "utils/CPP/TypeTraits.h" + +namespace __llvm_libc { +namespace fputil { + +static constexpr int quotientLSBBits = 3; + +// The implementation is a bit-by-bit algorithm which uses integer division +// to evaluate the quotient and remainder. +template ::Value, int> = 0> +static inline T remquo(T x, T y, int &q) { + FPBits xbits(x), ybits(y); + if (xbits.isNaN()) + return x; + if (ybits.isNaN()) + return y; + if (xbits.isInf() || ybits.isZero()) + return FPBits::buildNaN(1); + + if (xbits.isZero() || ybits.isInf()) { + q = 0; + return __llvm_libc::fputil::copysign(T(0.0), x); + } + + bool resultSign = (xbits.sign == ybits.sign ? false : true); + + // Once we know the sign of the result, we can just operate on the absolute + // values. The correct sign can be applied to the result after the result + // is evaluated. + xbits.sign = ybits.sign = 0; + + NormalFloat normalx(xbits), normaly(ybits); + int exp = normalx.exponent - normaly.exponent; + typename NormalFloat::UIntType mx = normalx.mantissa, + my = normaly.mantissa; + + q = 0; + while (exp >= 0) { + unsigned shiftCount = 0; + typename NormalFloat::UIntType n = mx; + for (shiftCount = 0; n < my; n <<= 1, ++shiftCount) + ; + + if (static_cast(shiftCount) > exp) + break; + + exp -= shiftCount; + if (0 <= exp && exp < quotientLSBBits) + q |= (1 << exp); + + mx = n - my; + if (mx == 0) + return __llvm_libc::fputil::copysign(T(0.0), x); + } + + NormalFloat remainder(exp + normaly.exponent, mx, 0); + + // Since NormalFloat to native type conversion is a truncation operation + // currently, the remainder value in the native type is correct as is. + // However, if NormalFloat to native type conversion is updated in future, + // then the conversion to native remainder value should be updated + // appropriately and some directed tests added. + T nativeRemainder(remainder); + T absy = T(ybits); + int cmp = remainder.mul2(1).cmp(normaly); + if (cmp > 0) { + q = q + 1; + if (x >= T(0.0)) + nativeRemainder = nativeRemainder - absy; + else + nativeRemainder = absy - nativeRemainder; + } else if (cmp == 0) { + if (q & 1) { + q += 1; + if (x >= T(0.0)) + nativeRemainder = -nativeRemainder; + } else { + if (x < T(0.0)) + nativeRemainder = -nativeRemainder; + } + } else { + if (x < T(0.0)) + nativeRemainder = -nativeRemainder; + } + + q = resultSign ? -q : q; + if (nativeRemainder == T(0.0)) + return __llvm_libc::fputil::copysign(T(0.0), x); + return nativeRemainder; +} + +} // namespace fputil +} // namespace __llvm_libc + +#endif // LLVM_LIBC_UTILS_FPUTIL_DIVISION_AND_REMAINDER_OPERATIONS_H diff --git a/libc/utils/FPUtil/FPBits.h b/libc/utils/FPUtil/FPBits.h index 2c630dba2af76..89bdd92669b81 100644 --- a/libc/utils/FPUtil/FPBits.h +++ b/libc/utils/FPUtil/FPBits.h @@ -73,6 +73,14 @@ template struct __attribute__((packed)) FPBits { static constexpr int exponentBias = (1 << (ExponentWidth::value - 1)) - 1; static constexpr int maxExponent = (1 << ExponentWidth::value) - 1; + static constexpr UIntType minSubnormal = UIntType(1); + static constexpr UIntType maxSubnormal = + (UIntType(1) << MantissaWidth::value) - 1; + static constexpr UIntType minNormal = + (UIntType(1) << MantissaWidth::value); + static constexpr UIntType maxNormal = + ((UIntType(maxExponent) - 1) << MantissaWidth::value) | maxSubnormal; + // We don't want accidental type promotions/conversions so we require exact // type match. template struct __attribute__((packed)) FPBits { static constexpr int exponentBias = 0x3FFF; static constexpr int maxExponent = 0x7FFF; + static constexpr UIntType minSubnormal = UIntType(1); + // Subnormal numbers include the implicit bit in x86 long double formats. + static constexpr UIntType maxSubnormal = + (UIntType(1) << (MantissaWidth::value + 1)) - 1; + static constexpr UIntType minNormal = + (UIntType(3) << MantissaWidth::value); + static constexpr UIntType maxNormal = + ((UIntType(maxExponent) - 1) << (MantissaWidth::value + 1)) | + (UIntType(1) << MantissaWidth::value) | maxSubnormal; UIntType mantissa : MantissaWidth::value; uint8_t implicitBit : 1; diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index a6867fb30a37c..f5c6e5b8251aa 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -184,6 +184,8 @@ Status ------------------------------------------------- ----------------- ``__cpp_lib_constexpr_swap_algorithms`` *unimplemented* ------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_utility`` ``201811L`` + ------------------------------------------------- ----------------- ``__cpp_lib_destroying_delete`` ``201806L`` ------------------------------------------------- ----------------- ``__cpp_lib_endian`` ``201907L`` diff --git a/libcxx/include/__config b/libcxx/include/__config index d7b6a2acaefff..17e6bfe207aaf 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -38,7 +38,7 @@ # define _LIBCPP_ABI_VERSION 1 #endif -#ifndef __STDC_HOSTED__ +#if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING #endif @@ -398,9 +398,7 @@ #elif defined(_LIBCPP_COMPILER_CLANG) # define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) #else -// This definition is potentially buggy, but it's only taken with GCC in C++03, -// which we barely support anyway. See llvm.org/PR39713 -# define _LIBCPP_ALIGNOF(_Tp) __alignof(_Tp) +# error "We don't know a correct way to implement alignof(T) in C++03 outside of Clang" #endif #define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) diff --git a/libcxx/include/ctime b/libcxx/include/ctime index f9f2f1659d0e0..b0e6c65af5d5f 100644 --- a/libcxx/include/ctime +++ b/libcxx/include/ctime @@ -52,6 +52,20 @@ int timespec_get( struct timespec *ts, int base); // C++17 #pragma GCC system_header #endif +// FIXME: +// Apple SDKs don't define ::timespec_get unconditionally in C++ mode. This +// should be fixed in future SDKs, but for the time being we need to avoid +// trying to use that declaration when the SDK doesn't provide it. Note that +// we're detecting this here instead of in <__config> because we can't include +// system headers from <__config>, since it leads to circular module dependencies. +// This is also meant to be a very temporary workaround until the SDKs are fixed. +#if defined(__APPLE__) +# include +# if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL) +# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED +# endif +#endif + _LIBCPP_BEGIN_NAMESPACE_STD using ::clock_t; @@ -72,7 +86,7 @@ using ::gmtime; using ::localtime; #endif using ::strftime; -#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_TIMESPEC_GET) +#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_TIMESPEC_GET) && !defined(_LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED) using ::timespec_get; #endif diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 1f80b70759c78..c3c7db5ff1184 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -1393,7 +1393,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator, _Alloc> template template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_T1, _T2>::pair(piecewise_construct_t, tuple<_Args1...>& __first_args, tuple<_Args2...>& __second_args, __tuple_indices<_I1...>, __tuple_indices<_I2...>) diff --git a/libcxx/include/utility b/libcxx/include/utility index 7ac322bfe7102..13489de22c95a 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -499,7 +499,7 @@ struct _LIBCPP_TEMPLATE_VIS pair second(_VSTD::get<1>(_VSTD::forward<_Tuple>(__p))) {} template - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair(piecewise_construct_t __pc, tuple<_Args1...> __first_args, tuple<_Args2...> __second_args) _NOEXCEPT_((is_nothrow_constructible::value && @@ -508,7 +508,7 @@ struct _LIBCPP_TEMPLATE_VIS pair typename __make_tuple_indices::type(), typename __make_tuple_indices::type()) {} - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair& operator=(typename conditional< is_copy_assignable::value && is_copy_assignable::value, @@ -521,7 +521,7 @@ struct _LIBCPP_TEMPLATE_VIS pair return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair& operator=(typename conditional< is_move_assignable::value && is_move_assignable::value, @@ -537,7 +537,7 @@ struct _LIBCPP_TEMPLATE_VIS pair template ::template __enable_assign<_Tuple>() > = false> - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair& operator=(_Tuple&& __p) { first = _VSTD::get<0>(_VSTD::forward<_Tuple>(__p)); second = _VSTD::get<1>(_VSTD::forward<_Tuple>(__p)); @@ -545,7 +545,7 @@ struct _LIBCPP_TEMPLATE_VIS pair } #endif - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void swap(pair& __p) _NOEXCEPT_(__is_nothrow_swappable::value && __is_nothrow_swappable::value) @@ -558,10 +558,10 @@ private: #ifndef _LIBCPP_CXX03_LANG template - _LIBCPP_INLINE_VISIBILITY - pair(piecewise_construct_t, - tuple<_Args1...>& __first_args, tuple<_Args2...>& __second_args, - __tuple_indices<_I1...>, __tuple_indices<_I2...>); + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + pair(piecewise_construct_t, + tuple<_Args1...>& __first_args, tuple<_Args2...>& __second_args, + __tuple_indices<_I1...>, __tuple_indices<_I2...>); #endif }; @@ -619,7 +619,7 @@ operator<=(const pair<_T1,_T2>& __x, const pair<_T1,_T2>& __y) } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename enable_if < __is_swappable<_T1>::value && diff --git a/libcxx/include/version b/libcxx/include/version index acedd03073cc5..dc53be3937c4c 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -42,6 +42,7 @@ __cpp_lib_concepts 201806L __cpp_lib_constexpr_misc 201811L __cpp_lib_constexpr_swap_algorithms 201806L +__cpp_lib_constexpr_utility 201811L __cpp_lib_destroying_delete 201806L __cpp_lib_enable_shared_from_this 201603L __cpp_lib_endian 201907L @@ -227,6 +228,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_concepts 201806L // # define __cpp_lib_constexpr_misc 201811L // # define __cpp_lib_constexpr_swap_algorithms 201806L +# define __cpp_lib_constexpr_utility 201811L # if _LIBCPP_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # define __cpp_lib_destroying_delete 201806L # endif diff --git a/libcxx/test/libcxx/atomics/ext-int.verify.cpp b/libcxx/test/libcxx/atomics/ext-int.verify.cpp index 7cedcf1995617..39fa574da8064 100644 --- a/libcxx/test/libcxx/atomics/ext-int.verify.cpp +++ b/libcxx/test/libcxx/atomics/ext-int.verify.cpp @@ -15,12 +15,14 @@ // UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8, clang-9, clang-10 // UNSUPPORTED: apple-clang-9, apple-clang-10, apple-clang-11, apple-clang-12 +// UNSUPPORTED: c++03 + #include int main(int, char**) { // expected-error@atomic:*1 {{_Atomic cannot be applied to integer type '_ExtInt(32)'}} - std::atomic<_ExtInt(32)> x {42}; + std::atomic<_ExtInt(32)> x(42); return 0; } diff --git a/libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp b/libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp new file mode 100644 index 0000000000000..cf4c5957a4183 --- /dev/null +++ b/libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// Make sure that can be included even when _XOPEN_SOURCE is defined. +// This used to trigger some bug in Apple SDKs, since timespec_get was not +// defined in but we tried using it from . +// See https://llvm.org/PR47208 for details. + +// ADDITIONAL_COMPILE_FLAGS: -D_XOPEN_SOURCE=500 + +#include diff --git a/libcxx/test/libcxx/libcpp_freestanding.sh.cpp b/libcxx/test/libcxx/libcpp_freestanding.sh.cpp new file mode 100644 index 0000000000000..5a51f1be4e823 --- /dev/null +++ b/libcxx/test/libcxx/libcpp_freestanding.sh.cpp @@ -0,0 +1,21 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Test that _LIBCPP_FREESTANDING is not defined when -ffreestanding is not passed +// to the compiler but defined when -ffreestanding is passed to the compiler. + +// RUN: %{cxx} %{flags} %{compile_flags} -fsyntax-only %s +// RUN: %{cxx} %{flags} %{compile_flags} -fsyntax-only -ffreestanding -DFREESTANDING %s + +#include <__config> + +#if defined(FREESTANDING) != defined(_LIBCPP_FREESTANDING) +#error _LIBCPP_FREESTANDING should be defined in freestanding mode and not \ + defined in non-freestanding mode +#endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp index 1fd38627a715a..e595a99a749ff 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp @@ -16,6 +16,7 @@ /* Constant Value __cpp_lib_as_const 201510L [C++17] __cpp_lib_constexpr_misc 201811L [C++2a] + __cpp_lib_constexpr_utility 201811L [C++2a] __cpp_lib_exchange_function 201304L [C++14] __cpp_lib_integer_sequence 201304L [C++14] __cpp_lib_to_chars 201611L [C++17] @@ -35,6 +36,10 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should not be defined before c++14" # endif @@ -61,6 +66,10 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++14" # endif @@ -99,6 +108,10 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++17" # endif @@ -155,6 +168,13 @@ # endif # endif +# ifndef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should be defined in c++2a" +# endif +# if __cpp_lib_constexpr_utility != 201811L +# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++2a" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index 96a0fea6b9186..afbee586df3c6 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -36,6 +36,7 @@ __cpp_lib_concepts 201806L [C++2a] __cpp_lib_constexpr_misc 201811L [C++2a] __cpp_lib_constexpr_swap_algorithms 201806L [C++2a] + __cpp_lib_constexpr_utility 201811L [C++2a] __cpp_lib_destroying_delete 201806L [C++2a] __cpp_lib_enable_shared_from_this 201603L [C++17] __cpp_lib_endian 201907L [C++2a] @@ -194,6 +195,10 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++2a" # endif @@ -550,6 +555,10 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++2a" # endif @@ -1020,6 +1029,10 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++2a" # endif @@ -1721,6 +1734,13 @@ # endif # endif +# ifndef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should be defined in c++2a" +# endif +# if __cpp_lib_constexpr_utility != 201811L +# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++2a" diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp index 078d424a1ba3e..becf36e66cf3a 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp @@ -20,33 +20,51 @@ #include "archetypes.h" #endif -int main(int, char**) -{ - { - typedef std::pair P1; - typedef std::pair P2; - P1 p1(3, static_cast(4)); - P2 p2; - p2 = p1; - assert(p2.first == 3); - assert(p2.second == 4); - } -#if TEST_STD_VER >= 11 - { - using C = TestTypes::TestType; - using P = std::pair; - using T = std::pair; - const T t(42, -42); - P p(101, 101); - C::reset_constructors(); - p = t; - assert(C::constructed == 0); - assert(C::assigned == 1); - assert(C::copy_assigned == 1); - assert(C::move_assigned == 0); - assert(p.first == 42); - assert(p.second.value == -42); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + P1 p1(3, static_cast(4)); + P2 p2; + p2 = p1; + assert(p2.first == 3); + assert(p2.second == 4); + } +#if TEST_STD_VER >= 20 + { + using C = ConstexprTestTypes::TestType; + using P = std::pair; + using T = std::pair; + const T t(42, -42); + P p(101, 101); + p = t; + assert(p.first == 42); + assert(p.second.value == -42); + } +#elif TEST_STD_VER >= 11 + { + using C = TestTypes::TestType; + using P = std::pair; + using T = std::pair; + const T t(42, -42); + P p(101, 101); + C::reset_constructors(); + p = t; + assert(C::constructed == 0); + assert(C::assigned == 1); + assert(C::copy_assigned == 1); + assert(C::move_assigned == 0); + assert(p.first == 42); + assert(p.second.value == -42); + } +#endif + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); #endif return 0; diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp index 303aedf50969d..94e30aec3332c 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp @@ -19,85 +19,81 @@ #include #include "test_macros.h" - - -struct NonAssignable { - NonAssignable& operator=(NonAssignable const&) = delete; - NonAssignable& operator=(NonAssignable&&) = delete; -}; -struct CopyAssignable { - CopyAssignable() = default; - CopyAssignable(CopyAssignable const&) = default; - CopyAssignable& operator=(CopyAssignable const&) = default; - CopyAssignable& operator=(CopyAssignable&&) = delete; -}; -struct MoveAssignable { - MoveAssignable() = default; - MoveAssignable& operator=(MoveAssignable const&) = delete; - MoveAssignable& operator=(MoveAssignable&&) = default; -}; +#include "archetypes.h" struct CountAssign { - static int copied; - static int moved; - static void reset() { copied = moved = 0; } - CountAssign() = default; - CountAssign& operator=(CountAssign const&) { ++copied; return *this; } - CountAssign& operator=(CountAssign&&) { ++moved; return *this; } + int copied = 0; + int moved = 0; + TEST_CONSTEXPR_CXX20 CountAssign() = default; + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign const&) { + ++copied; + return *this; + } + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign&&) { + ++moved; + return *this; + } }; -int CountAssign::copied = 0; -int CountAssign::moved = 0; struct Incomplete; extern Incomplete inc_obj; -int main(int, char**) -{ - { - typedef std::pair P; - const P p1(CopyAssignable(), short{4}); - P p2; - p2 = p1; - assert(p2.second == 4); - } - { - using P = std::pair; - int x = 42; - int y = 101; - int x2 = -1; - int y2 = 300; - P p1(x, std::move(y)); - P p2(x2, std::move(y2)); - p1 = p2; - assert(p1.first == x2); - assert(p1.second == y2); - } - { - using P = std::pair; - static_assert(!std::is_copy_assignable

::value, ""); - } - { - CountAssign::reset(); - using P = std::pair; - static_assert(std::is_copy_assignable

::value, ""); - P p; - P p2; - p = p2; - assert(CountAssign::copied == 1); - assert(CountAssign::moved == 0); - } - { - using P = std::pair; - static_assert(!std::is_copy_assignable

::value, ""); - } - { - using P = std::pair; - static_assert(!std::is_copy_assignable

::value, ""); - P p(42, inc_obj); - assert(&p.second == &inc_obj); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P; + const P p1(ConstexprTestTypes::CopyOnly(), short{4}); + P p2; + p2 = p1; + assert(p2.second == 4); + } + { + using P = std::pair; + int x = 42; + int y = 101; + int x2 = -1; + int y2 = 300; + P p1(x, std::move(y)); + P p2(x2, std::move(y2)); + p1 = p2; + assert(p1.first == x2); + assert(p1.second == y2); + } + { + using P = std::pair; + static_assert(!std::is_copy_assignable

::value, ""); + } + { + using P = std::pair; + static_assert(std::is_copy_assignable

::value, ""); + P p; + P p2; + p = p2; + assert(p.first.copied == 1); + assert(p.first.moved == 0); + assert(p2.first.copied == 0); + assert(p2.first.moved == 0); + } + { + using P = std::pair; + static_assert(!std::is_copy_assignable

::value, ""); + } + { + using P = std::pair; + static_assert(!std::is_copy_assignable

::value, ""); + P p(42, inc_obj); + assert(&p.second == &inc_obj); + } + + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif - return 0; + return 0; } struct Incomplete {}; diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp index 811e89015b485..cc4e4f5c1108e 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp @@ -19,81 +19,78 @@ #include #include "test_macros.h" - - -struct NonAssignable { - NonAssignable& operator=(NonAssignable const&) = delete; - NonAssignable& operator=(NonAssignable&&) = delete; -}; -struct CopyAssignable { - CopyAssignable() = default; - CopyAssignable& operator=(CopyAssignable const&) = default; - CopyAssignable& operator=(CopyAssignable&&) = delete; -}; -struct MoveAssignable { - MoveAssignable() = default; - MoveAssignable& operator=(MoveAssignable const&) = delete; - MoveAssignable& operator=(MoveAssignable&&) = default; -}; +#include "archetypes.h" struct CountAssign { - static int copied; - static int moved; - static void reset() { copied = moved = 0; } - CountAssign() = default; - CountAssign& operator=(CountAssign const&) { ++copied; return *this; } - CountAssign& operator=(CountAssign&&) { ++moved; return *this; } + int copied = 0; + int moved = 0; + TEST_CONSTEXPR_CXX20 CountAssign() = default; + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign const&) { + ++copied; + return *this; + } + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign&&) { + ++moved; + return *this; + } }; -int CountAssign::copied = 0; -int CountAssign::moved = 0; -int main(int, char**) -{ - { - typedef std::pair, int> P; - P p1(std::unique_ptr(new int(3)), 4); - P p2; - p2 = std::move(p1); - assert(*p2.first == 3); - assert(p2.second == 4); - } - { - using P = std::pair; - int x = 42; - int y = 101; - int x2 = -1; - int y2 = 300; - P p1(x, std::move(y)); - P p2(x2, std::move(y2)); - p1 = std::move(p2); - assert(p1.first == x2); - assert(p1.second == y2); - } - { - using P = std::pair; - static_assert(!std::is_move_assignable

::value, ""); - } - { - // The move decays to the copy constructor - CountAssign::reset(); - using P = std::pair; - static_assert(std::is_move_assignable

::value, ""); - P p; - P p2; - p = std::move(p2); - assert(CountAssign::moved == 0); - assert(CountAssign::copied == 1); - } - { - CountAssign::reset(); - using P = std::pair; - static_assert(std::is_move_assignable

::value, ""); - P p; - P p2; - p = std::move(p2); - assert(CountAssign::moved == 1); - assert(CountAssign::copied == 0); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P; + P p1(3, 4); + P p2; + p2 = std::move(p1); + assert(p2.first.value == 3); + assert(p2.second == 4); + } + { + using P = std::pair; + int x = 42; + int y = 101; + int x2 = -1; + int y2 = 300; + P p1(x, std::move(y)); + P p2(x2, std::move(y2)); + p1 = std::move(p2); + assert(p1.first == x2); + assert(p1.second == y2); + } + { + using P = std::pair; + static_assert(!std::is_move_assignable

::value, ""); + } + { + // The move decays to the copy constructor + using P = std::pair; + static_assert(std::is_move_assignable

::value, ""); + P p; + P p2; + p = std::move(p2); + assert(p.first.moved == 0); + assert(p.first.copied == 1); + assert(p2.first.moved == 0); + assert(p2.first.copied == 0); + } + { + using P = std::pair; + static_assert(std::is_move_assignable

::value, ""); + P p; + P p2; + p = std::move(p2); + assert(p.first.moved == 1); + assert(p.first.copied == 0); + assert(p2.first.moved == 0); + assert(p2.first.copied == 0); + } + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp index ed56480b2f77d..7267f7a2ef8b2 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp @@ -17,46 +17,58 @@ #include #include #include -#include #include "test_macros.h" +#include "archetypes.h" -struct Base -{ - virtual ~Base() {} +struct Derived : ConstexprTestTypes::MoveOnly { + Derived() = default; + TEST_CONSTEXPR_CXX20 Derived(ConstexprTestTypes::MoveOnly&&){}; }; - -struct Derived - : public Base -{ +struct CountAssign { + int copied = 0; + int moved = 0; + TEST_CONSTEXPR_CXX20 CountAssign() = default; + TEST_CONSTEXPR_CXX20 CountAssign(const int){}; + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign const&) { + ++copied; + return *this; + } + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign&&) { + ++moved; + return *this; + } }; -int main(int, char**) -{ - { - typedef std::pair, short> P1; - typedef std::pair, long> P2; - P1 p1(std::unique_ptr(), static_cast(4)); - P2 p2; - p2 = std::move(p1); - assert(p2.first == nullptr); - assert(p2.second == 4); - } - { - using C = TestTypes::TestType; - using P = std::pair; - using T = std::pair; - T t(42, -42); - P p(101, 101); - C::reset_constructors(); - p = std::move(t); - assert(C::constructed == 0); - assert(C::assigned == 1); - assert(C::copy_assigned == 0); - assert(C::move_assigned == 1); - assert(p.first == 42); - assert(p.second.value == -42); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + P1 p1(Derived(), static_cast(4)); + P2 p2; + p2 = std::move(p1); + assert(p2.second == 4); + } + { + using P = std::pair; + using T = std::pair; + T t(42, -42); + P p(101, 101); + p = std::move(t); + assert(p.first == 42); + assert(p.second.moved == 1); + assert(p.second.copied == 0); + assert(t.second.moved == 0); + assert(t.second.copied == 0); + } + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp index 358689ed0b97f..42ddb4e6058b2 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp @@ -23,18 +23,16 @@ #include "test_macros.h" using namespace ImplicitTypes; // Get implicitly archetypes -template -void test_pair_const() -{ - using P1 = std::pair; - using P2 = std::pair; - using UP1 = std::pair const&; - using UP2 = std::pair const&; - static_assert(std::is_constructible::value == CanCopy, ""); - static_assert(test_convertible() == CanConvert, ""); - static_assert(std::is_constructible::value == CanCopy, ""); - static_assert(test_convertible() == CanConvert, ""); +template +TEST_CONSTEXPR_CXX20 void test_pair_const() { + using P1 = std::pair; + using P2 = std::pair; + using UP1 = std::pair const&; + using UP2 = std::pair const&; + static_assert(std::is_constructible::value == CanCopy, ""); + static_assert(test_convertible() == CanConvert, ""); + static_assert(std::is_constructible::value == CanCopy, ""); + static_assert(test_convertible() == CanConvert, ""); } template @@ -55,104 +53,115 @@ struct ImplicitT { int value; }; -int main(int, char**) -{ - { - typedef std::pair P1; - typedef std::pair P2; - const P1 p1(3, 4); - const P2 p2 = p1; - assert(p2.first == 3); - assert(p2.second == 4); - } - { - // We allow derived types to use this constructor - using P1 = DPair; - using P2 = std::pair; - P1 p1(42, 101); - P2 p2(p1); - assert(p2.first == 42); - assert(p2.second == 101); - } - { - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - } - - { // Test construction of references - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - // Unfortunately the below conversions are allowed and create dangling - // references. - //test_pair_const(); - //test_pair_const(); - //test_pair_const(); - // But these are not because the converting constructor is explicit. - test_pair_const(); - test_pair_const(); - test_pair_const(); - - } - { - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + const P1 p1(3, 4); + const P2 p2 = p1; + assert(p2.first == 3); + assert(p2.second == 4); + } + { + // We allow derived types to use this constructor + using P1 = DPair; + using P2 = std::pair; + P1 p1(42, 101); + P2 p2(p1); + assert(p2.first == 42); + assert(p2.second == 101); + } + { + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + } + + { // Test construction of references + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + // Unfortunately the below conversions are allowed and create dangling + // references. + //test_pair_const(); + //test_pair_const(); + //test_pair_const(); + // But these are not because the converting constructor is explicit. + test_pair_const(); + test_pair_const(); + test_pair_const(); + } + { + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + } #if TEST_STD_VER > 11 { typedef std::pair P1; @@ -171,14 +180,22 @@ int main(int, char**) static_assert(p2.second.value == 101, ""); } { - using P1 = std::pair; - using P2 = std::pair; - constexpr P1 p1(42, 101); - constexpr P2 p2 = p1; - static_assert(p2.first.value == 42, ""); - static_assert(p2.second.value == 101, ""); + using P1 = std::pair; + using P2 = std::pair; + constexpr P1 p1(42, 101); + constexpr P2 p2 = p1; + static_assert(p2.first.value == 42, ""); + static_assert(p2.second.value == 101, ""); } #endif + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp index 203209d5e6346..37bbb9501769c 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp @@ -22,18 +22,24 @@ #include "test_macros.h" +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + typedef std::pair P3; + P3 p3(std::piecewise_construct, std::tuple(3, nullptr), + std::tuple(nullptr, 4)); + assert(p3.first == P1(3, nullptr)); + assert(p3.second == P2(nullptr, 4)); + } + return true; +} -int main(int, char**) -{ - { - typedef std::pair P1; - typedef std::pair P2; - typedef std::pair P3; - P3 p3(std::piecewise_construct, std::tuple(3, nullptr), - std::tuple(nullptr, 4)); - assert(p3.first == P1(3, nullptr)); - assert(p3.second == P2(nullptr, 4)); - } +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp index 9a497e5ac532d..a2d720ff42ea6 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp @@ -19,35 +19,40 @@ struct S { int i; - S() : i(0) {} - S(int j) : i(j) {} - S * operator& () { assert(false); return this; } - S const * operator& () const { assert(false); return this; } - bool operator==(int x) const { return i == x; } - }; - -int main(int, char**) -{ - { - typedef std::pair P1; - P1 p1(3, static_cast(4)); - P1 p2(5, static_cast(6)); - p1.swap(p2); - assert(p1.first == 5); - assert(p1.second == 6); - assert(p2.first == 3); - assert(p2.second == 4); - } - { - typedef std::pair P1; - P1 p1(3, S(4)); - P1 p2(5, S(6)); - p1.swap(p2); - assert(p1.first == 5); - assert(p1.second == 6); - assert(p2.first == 3); - assert(p2.second == 4); - } + TEST_CONSTEXPR_CXX20 S() : i(0) {} + TEST_CONSTEXPR_CXX20 S(int j) : i(j) {} + TEST_CONSTEXPR_CXX20 bool operator==(int x) const { return i == x; } +}; + +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + P1 p1(3, static_cast(4)); + P1 p2(5, static_cast(6)); + p1.swap(p2); + assert(p1.first == 5); + assert(p1.second == 6); + assert(p2.first == 3); + assert(p2.second == 4); + } + { + typedef std::pair P1; + P1 p1(3, S(4)); + P1 p2(5, S(6)); + p1.swap(p2); + assert(p1.first == 5); + assert(p1.second == 6); + assert(p2.first == 3); + assert(p2.second == 4); + } + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/support/emplace_constructible.h b/libcxx/test/support/emplace_constructible.h index f0d11ba76c87d..42a62fabe656d 100644 --- a/libcxx/test/support/emplace_constructible.h +++ b/libcxx/test/support/emplace_constructible.h @@ -7,7 +7,7 @@ template struct EmplaceConstructible { T value; - explicit EmplaceConstructible(T xvalue) : value(xvalue) {} + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructible(T xvalue) : value(xvalue) {} EmplaceConstructible(EmplaceConstructible const&) = delete; }; @@ -15,9 +15,9 @@ template struct EmplaceConstructibleAndMoveInsertable { int copied = 0; T value; - explicit EmplaceConstructibleAndMoveInsertable(T xvalue) : value(xvalue) {} + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructibleAndMoveInsertable(T xvalue) : value(xvalue) {} - EmplaceConstructibleAndMoveInsertable( + TEST_CONSTEXPR_CXX14 EmplaceConstructibleAndMoveInsertable( EmplaceConstructibleAndMoveInsertable&& Other) : copied(Other.copied + 1), value(std::move(Other.value)) {} }; @@ -27,13 +27,13 @@ struct EmplaceConstructibleAndMoveable { int copied = 0; int assigned = 0; T value; - explicit EmplaceConstructibleAndMoveable(T xvalue) noexcept : value(xvalue) {} + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructibleAndMoveable(T xvalue) noexcept : value(xvalue) {} - EmplaceConstructibleAndMoveable(EmplaceConstructibleAndMoveable&& Other) + TEST_CONSTEXPR_CXX14 EmplaceConstructibleAndMoveable(EmplaceConstructibleAndMoveable&& Other) noexcept : copied(Other.copied + 1), value(std::move(Other.value)) {} - EmplaceConstructibleAndMoveable& + TEST_CONSTEXPR_CXX14 EmplaceConstructibleAndMoveable& operator=(EmplaceConstructibleAndMoveable&& Other) noexcept { copied = Other.copied; assigned = Other.assigned + 1; @@ -47,15 +47,15 @@ struct EmplaceConstructibleMoveableAndAssignable { int copied = 0; int assigned = 0; T value; - explicit EmplaceConstructibleMoveableAndAssignable(T xvalue) noexcept + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructibleMoveableAndAssignable(T xvalue) noexcept : value(xvalue) {} - EmplaceConstructibleMoveableAndAssignable( + TEST_CONSTEXPR_CXX14 EmplaceConstructibleMoveableAndAssignable( EmplaceConstructibleMoveableAndAssignable&& Other) noexcept : copied(Other.copied + 1), value(std::move(Other.value)) {} - EmplaceConstructibleMoveableAndAssignable& + TEST_CONSTEXPR_CXX14 EmplaceConstructibleMoveableAndAssignable& operator=(EmplaceConstructibleMoveableAndAssignable&& Other) noexcept { copied = Other.copied; assigned = Other.assigned + 1; @@ -63,7 +63,7 @@ struct EmplaceConstructibleMoveableAndAssignable { return *this; } - EmplaceConstructibleMoveableAndAssignable& operator=(T xvalue) { + TEST_CONSTEXPR_CXX14 EmplaceConstructibleMoveableAndAssignable& operator=(T xvalue) { value = std::move(xvalue); ++assigned; return *this; diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h index fd23fc4383f34..b9f6f6147609c 100644 --- a/libcxx/test/support/min_allocator.h +++ b/libcxx/test/support/min_allocator.h @@ -220,19 +220,19 @@ class min_pointer void* ptr_; public: min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} template ::value >::type > - min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} + TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} - explicit operator bool() const {return ptr_ != nullptr;} + TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} + TEST_CONSTEXPR_CXX14 friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} + TEST_CONSTEXPR_CXX14 friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} template friend class min_pointer; }; @@ -241,13 +241,13 @@ class min_pointer { T* ptr_; - explicit min_pointer(T* p) TEST_NOEXCEPT : ptr_(p) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(T* p) TEST_NOEXCEPT : ptr_(p) {} public: min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} - explicit min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(static_cast(p.ptr_)) {} + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(static_cast(p.ptr_)) {} - explicit operator bool() const {return ptr_ != nullptr;} + TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} typedef std::ptrdiff_t difference_type; typedef T& reference; @@ -255,53 +255,53 @@ class min_pointer typedef T value_type; typedef std::random_access_iterator_tag iterator_category; - reference operator*() const {return *ptr_;} - pointer operator->() const {return ptr_;} + TEST_CONSTEXPR_CXX14 reference operator*() const {return *ptr_;} + TEST_CONSTEXPR_CXX14 pointer operator->() const {return ptr_;} - min_pointer& operator++() {++ptr_; return *this;} - min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator++() {++ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} - min_pointer& operator--() {--ptr_; return *this;} - min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator--() {--ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} - min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} - min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} - min_pointer operator+(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const { min_pointer tmp(*this); tmp += n; return tmp; } - friend min_pointer operator+(difference_type n, min_pointer x) + friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) { return x + n; } - min_pointer operator-(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const { min_pointer tmp(*this); tmp -= n; return tmp; } - friend difference_type operator-(min_pointer x, min_pointer y) + friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) { return x.ptr_ - y.ptr_; } - reference operator[](difference_type n) const {return ptr_[n];} + TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return ptr_[n];} - friend bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} - friend bool operator> (min_pointer x, min_pointer y) {return y < x;} - friend bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} - friend bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} + friend TEST_CONSTEXPR_CXX14 bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator> (min_pointer x, min_pointer y) {return y < x;} + friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} + friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} - static min_pointer pointer_to(T& t) {return min_pointer(std::addressof(t));} + static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(T& t) {return min_pointer(std::addressof(t));} - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} template friend class min_pointer; template friend class min_allocator; }; @@ -311,14 +311,14 @@ class min_pointer { const T* ptr_; - explicit min_pointer(const T* p) : ptr_(p) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(const T* p) : ptr_(p) {} public: min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) : ptr_(nullptr) {} - min_pointer(min_pointer p) : ptr_(p.ptr_) {} - explicit min_pointer(min_pointer p) : ptr_(static_cast(p.ptr_)) {} + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) : ptr_(p.ptr_) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) : ptr_(static_cast(p.ptr_)) {} - explicit operator bool() const {return ptr_ != nullptr;} + TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} typedef std::ptrdiff_t difference_type; typedef const T& reference; @@ -326,58 +326,58 @@ class min_pointer typedef const T value_type; typedef std::random_access_iterator_tag iterator_category; - reference operator*() const {return *ptr_;} - pointer operator->() const {return ptr_;} + TEST_CONSTEXPR_CXX14 reference operator*() const {return *ptr_;} + TEST_CONSTEXPR_CXX14 pointer operator->() const {return ptr_;} - min_pointer& operator++() {++ptr_; return *this;} - min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator++() {++ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} - min_pointer& operator--() {--ptr_; return *this;} - min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator--() {--ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} - min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} - min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} - min_pointer operator+(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const { min_pointer tmp(*this); tmp += n; return tmp; } - friend min_pointer operator+(difference_type n, min_pointer x) + friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) { return x + n; } - min_pointer operator-(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const { min_pointer tmp(*this); tmp -= n; return tmp; } - friend difference_type operator-(min_pointer x, min_pointer y) + friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) { return x.ptr_ - y.ptr_; } - reference operator[](difference_type n) const {return ptr_[n];} + TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return ptr_[n];} - friend bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} - friend bool operator> (min_pointer x, min_pointer y) {return y < x;} - friend bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} - friend bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} + friend TEST_CONSTEXPR_CXX14 bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator> (min_pointer x, min_pointer y) {return y < x;} + friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} + friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} - static min_pointer pointer_to(const T& t) {return min_pointer(std::addressof(t));} + static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(const T& t) {return min_pointer(std::addressof(t));} - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} template friend class min_pointer; }; template -inline +TEST_CONSTEXPR_CXX14 inline bool operator==(min_pointer x, std::nullptr_t) { @@ -385,7 +385,7 @@ operator==(min_pointer x, std::nullptr_t) } template -inline +TEST_CONSTEXPR_CXX14 inline bool operator==(std::nullptr_t, min_pointer x) { @@ -393,7 +393,7 @@ operator==(std::nullptr_t, min_pointer x) } template -inline +TEST_CONSTEXPR_CXX14 inline bool operator!=(min_pointer x, std::nullptr_t) { @@ -401,7 +401,7 @@ operator!=(min_pointer x, std::nullptr_t) } template -inline +TEST_CONSTEXPR_CXX14 inline bool operator!=(std::nullptr_t, min_pointer x) { @@ -417,20 +417,20 @@ class min_allocator min_allocator() = default; template - min_allocator(min_allocator) {} + TEST_CONSTEXPR_CXX20 min_allocator(min_allocator) {} - pointer allocate(std::ptrdiff_t n) + TEST_CONSTEXPR_CXX20 pointer allocate(std::ptrdiff_t n) { - return pointer(static_cast(::operator new(n*sizeof(T)))); + return pointer(std::allocator().allocate(n)); } - void deallocate(pointer p, std::ptrdiff_t) + TEST_CONSTEXPR_CXX20 void deallocate(pointer p, std::ptrdiff_t n) { - return ::operator delete(p.ptr_); + std::allocator().deallocate(p.ptr_, n); } - friend bool operator==(min_allocator, min_allocator) {return true;} - friend bool operator!=(min_allocator x, min_allocator y) {return !(x == y);} + TEST_CONSTEXPR_CXX20 friend bool operator==(min_allocator, min_allocator) {return true;} + TEST_CONSTEXPR_CXX20 friend bool operator!=(min_allocator x, min_allocator y) {return !(x == y);} }; template @@ -439,23 +439,23 @@ class explicit_allocator public: typedef T value_type; - explicit_allocator() TEST_NOEXCEPT {} + TEST_CONSTEXPR_CXX20 explicit_allocator() TEST_NOEXCEPT {} template - explicit explicit_allocator(explicit_allocator) TEST_NOEXCEPT {} + TEST_CONSTEXPR_CXX20 explicit explicit_allocator(explicit_allocator) TEST_NOEXCEPT {} - T* allocate(std::size_t n) + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { - return static_cast(::operator new(n*sizeof(T))); + return static_cast(std::allocator().allocate(n)); } - void deallocate(T* p, std::size_t) + TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { - return ::operator delete(static_cast(p)); + std::allocator().deallocate(p, n); } - friend bool operator==(explicit_allocator, explicit_allocator) {return true;} - friend bool operator!=(explicit_allocator x, explicit_allocator y) {return !(x == y);} + TEST_CONSTEXPR_CXX20 friend bool operator==(explicit_allocator, explicit_allocator) {return true;} + TEST_CONSTEXPR_CXX20 friend bool operator!=(explicit_allocator x, explicit_allocator y) {return !(x == y);} }; #endif // MIN_ALLOCATOR_H diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b77f88489d9d5..6ad1a18569893 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -607,6 +607,12 @@ def add_version_header(tc): "depends": "defined(__cpp_concepts) && __cpp_concepts >= 201811L", "internal_depends": "defined(__cpp_concepts) && __cpp_concepts >= 201811L", }, + {"name": "__cpp_lib_constexpr_utility", + "values": { + "c++2a": int(201811), + }, + "headers": ["utility"], + }, ]], key=lambda tc: tc["name"]) def get_std_dialects(): diff --git a/libunwind/src/Unwind_AppleExtras.cpp b/libunwind/src/Unwind_AppleExtras.cpp index 1d9948aced355..e3d41ca2b4e92 100644 --- a/libunwind/src/Unwind_AppleExtras.cpp +++ b/libunwind/src/Unwind_AppleExtras.cpp @@ -8,8 +8,6 @@ //===----------------------------------------------------------------------===// #include "config.h" -#include "AddressSpace.hpp" -#include "DwarfParser.hpp" // static linker symbols to prevent wrong two level namespace for _Unwind symbols diff --git a/libunwind/src/config.h b/libunwind/src/config.h index 2014b8cb77abd..fd177dd7338c1 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -18,6 +18,8 @@ #include #include +#include <__libunwind_config.h> + // Platform specific configuration defines. #ifdef __APPLE__ #if defined(FOR_DYLD) @@ -33,7 +35,7 @@ #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif #else - #if defined(__ARM_DWARF_EH__) || !defined(__arm__) + #if !defined(_LIBUNWIND_ARM_EHABI) #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 #endif @@ -81,6 +83,8 @@ #error Unsupported target #endif +// Apple/armv7k defaults to DWARF/Compact unwinding, but its libunwind also +// needs to include the SJLJ APIs. #if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__) #define _LIBUNWIND_BUILD_SJLJ_APIS #endif diff --git a/lld/CODE_OWNERS.TXT b/lld/CODE_OWNERS.TXT index f019a87553aa0..44972c0d345a8 100644 --- a/lld/CODE_OWNERS.TXT +++ b/lld/CODE_OWNERS.TXT @@ -15,8 +15,12 @@ D: COFF, ELF backends (COFF/* ELF/*) N: Lang Hames, Nick Kledzik E: lhames@gmail.com, kledzik@apple.com -D: Mach-O backend +D: Old Mach-O backend N: Sam Clegg E: sbc@chromium.org D: WebAssembly backend (wasm/*) + +N: Jez Ng, Greg McGary, Shoaib Meenai +E: jezng@fb.com, gkm@fb.com, smeenai@fb.com +D: New Mach-O backend diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index bfc8e9c1e53b1..acdb5c71efb96 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" +#include "llvm/Support/RISCVAttributeParser.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" @@ -867,10 +868,7 @@ template InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { StringRef name = getSectionName(sec); - switch (sec.sh_type) { - case SHT_ARM_ATTRIBUTES: { - if (config->emachine != EM_ARM) - break; + if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) { ARMAttributeParser attributes; ArrayRef contents = check(this->getObj().getSectionContents(&sec)); if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind @@ -878,20 +876,45 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { : support::big)) { auto *isec = make(*this, sec, name); warn(toString(isec) + ": " + llvm::toString(std::move(e))); - break; + } else { + updateSupportedARMFeatures(attributes); + updateARMVFPArgs(attributes, this); + + // FIXME: Retain the first attribute section we see. The eglibc ARM + // dynamic loaders require the presence of an attribute section for dlopen + // to work. In a full implementation we would merge all attribute + // sections. + if (in.attributes == nullptr) { + in.attributes = make(*this, sec, name); + return in.attributes; + } + return &InputSection::discarded; } - updateSupportedARMFeatures(attributes); - updateARMVFPArgs(attributes, this); - - // FIXME: Retain the first attribute section we see. The eglibc ARM - // dynamic loaders require the presence of an attribute section for dlopen - // to work. In a full implementation we would merge all attribute sections. - if (in.armAttributes == nullptr) { - in.armAttributes = make(*this, sec, name); - return in.armAttributes; + } + + if (config->emachine == EM_RISCV && sec.sh_type == SHT_RISCV_ATTRIBUTES) { + RISCVAttributeParser attributes; + ArrayRef contents = check(this->getObj().getSectionContents(&sec)); + if (Error e = attributes.parse(contents, support::little)) { + auto *isec = make(*this, sec, name); + warn(toString(isec) + ": " + llvm::toString(std::move(e))); + } else { + // FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is + // present. + + // FIXME: Retain the first attribute section we see. Tools such as + // llvm-objdump make use of the attribute section to determine which + // standard extensions to enable. In a full implementation we would merge + // all attribute sections. + if (in.attributes == nullptr) { + in.attributes = make(*this, sec, name); + return in.attributes; + } + return &InputSection::discarded; } - return &InputSection::discarded; } + + switch (sec.sh_type) { case SHT_LLVM_DEPENDENT_LIBRARIES: { if (config->relocatable) break; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 7779efcd5fe5a..8943596179c17 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1200,7 +1200,7 @@ inline Partition &SectionBase::getPartition() const { // Linker generated sections which can be used as inputs and are not specific to // a partition. struct InStruct { - InputSection *armAttributes; + InputSection *attributes; BssSection *bss; BssSection *bssRelRo; GotSection *got; diff --git a/lld/test/ELF/ppc64-pcrel-long-branch-error.s b/lld/test/ELF/ppc64-pcrel-long-branch-error.s index b9904909742d0..2db9d15b70d07 100644 --- a/lld/test/ELF/ppc64-pcrel-long-branch-error.s +++ b/lld/test/ELF/ppc64-pcrel-long-branch-error.s @@ -1,22 +1,23 @@ -## The test as-is needs a large heap size. -## Disabled until we know how to check for that prerequisite. -# UNSUPPORTED: ppc - -# REQUIRES: ppc +# REQUIRES: ppc, system-linux # RUN: echo 'SECTIONS { \ # RUN: .text_low 0x2000: { *(.text_low) } \ -# RUN: .text_high 0x800002000 : { *(.text_high) } \ +# RUN: .text_high 0x200002010 : { *(.text_high) } \ # RUN: }' > %t.script +## In this test, we do not use -o /dev/null like other similar cases do since +## it will fail in some enviroments with out-of-memory errors associated with +## buffering the output in memeory. The test is enabled for ppc linux only since +## writing to an allocated file will cause time out error for this case on freebsd. + # RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o -# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=ppc64le -defsym HIDDEN=1 %s -o %t.o -# RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared -T %t.script %t.o -o %t 2>&1 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=ppc64 %s -o %t.o -# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=ppc64 -defsym HIDDEN=1 %s -o %t.o -# RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared -T %t.script %t.o -o %t 2>&1 | FileCheck %s # CHECK: error: offset overflow 34 bits, please compile using the large code model diff --git a/lld/test/ELF/riscv-attributes.s b/lld/test/ELF/riscv-attributes.s new file mode 100644 index 0000000000000..36e506a1df662 --- /dev/null +++ b/lld/test/ELF/riscv-attributes.s @@ -0,0 +1,33 @@ +# REQUIRES: riscv + +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=-relax %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf --arch-specific %t | FileCheck %s +# RUN: ld.lld %t.o %t.o -o %t2 +# RUN: llvm-readelf --arch-specific %t2 | FileCheck %s + +# CHECK: BuildAttributes { +# CHECK-NEXT: FormatVersion: 0x41 +# CHECK-NEXT: Section 1 { +# CHECK-NEXT: SectionLength: 52 +# CHECK-NEXT: Vendor: riscv +# CHECK-NEXT: Tag: Tag_File (0x1) +# CHECK-NEXT: Size: 42 +# CHECK-NEXT: FileAttributes { +# CHECK-NEXT: Attribute { +# CHECK-NEXT: Tag: 4 +# CHECK-NEXT: Value: 16 +# CHECK-NEXT: TagName: stack_align +# CHECK-NEXT: Description: Stack alignment is 16-bytes +# CHECK-NEXT: } +# CHECK-NEXT: Attribute { +# CHECK-NEXT: Tag: 5 +# CHECK-NEXT: TagName: arch +# CHECK-NEXT: Value: rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0 +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: } + +.attribute 4, 16 +.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0" diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s index 6acd1ca4a72da..bed04eecd4a9f 100644 --- a/lld/test/MachO/local-got.s +++ b/lld/test/MachO/local-got.s @@ -11,12 +11,12 @@ ## Check that the GOT references the cstrings. --full-contents displays the ## address offset and the contents at that address very similarly, so am using ## --match-full-lines to make sure we match on the right thing. -# CHECK: Contents of section __cstring: +# CHECK: Contents of section __TEXT,__cstring: # CHECK-NEXT: 10000040c {{.*}} ## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the ## start of __cstring -# CHECK: Contents of section __got: +# CHECK: Contents of section __DATA_CONST,__got: # CHECK-NEXT: [[#%X,ADDR:]] 1a040000 01000000 0c040000 01000000 {{.*}} # CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}} diff --git a/lld/test/MachO/relocations.s b/lld/test/MachO/relocations.s index 006df404242b7..e6f7778413d5b 100644 --- a/lld/test/MachO/relocations.s +++ b/lld/test/MachO/relocations.s @@ -20,7 +20,7 @@ # CHECK-NEXT: [[#%x, CSTRING_ADDR + 22 - LSTR_OFF]] # RUN: llvm-objdump --section=__const --full-contents %t | FileCheck %s --check-prefix=NONPCREL -# NONPCREL: Contents of section __const: +# NONPCREL: Contents of section __DATA,__const: # NONPCREL-NEXT: 100001000 f0030000 01000000 f0030000 01000000 .section __TEXT,__text diff --git a/lld/test/MachO/sectcreate.s b/lld/test/MachO/sectcreate.s index ac561d88110ba..482e3e5572dc9 100644 --- a/lld/test/MachO/sectcreate.s +++ b/lld/test/MachO/sectcreate.s @@ -10,13 +10,13 @@ # RUN: -o %t %t.o # RUN: llvm-objdump -s %t | FileCheck %s -# CHECK: Contents of section __text: -# CHECK: Contents of section __data: +# CHECK: Contents of section __TEXT,__text: +# CHECK: Contents of section __DATA,__data: # CHECK: my string!. -# CHECK: Contents of section SEC1: +# CHECK: Contents of section SEG,SEC1: # CHECK: -sectcreate 1.1. # CHECK: -sectcreate 1.2. -# CHECK: Contents of section SEC2: +# CHECK: Contents of section SEG,SEC2: # CHECK: -sectcreate 2. .text diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s index 69c55a047b490..34c19d769b945 100644 --- a/lld/test/MachO/section-merge.s +++ b/lld/test/MachO/section-merge.s @@ -15,7 +15,7 @@ # CHECK-DAG: {{0*}}[[#ADDR+0x8]] g O __DATA,__data _baz # CHECK-DAG: {{0*}}[[#ADDR+0x9]] g O __DATA,__data _qux -# CHECK: Contents of section __data: +# CHECK: Contents of section __DATA,__data: # CHECK-NEXT: {{0*}}[[#ADDR]] ca000000 fe000000 baefbead de000000 .section __TEXT,__text diff --git a/lld/test/MachO/weak-binding.s b/lld/test/MachO/weak-binding.s index 3474d35ce921b..fc4106bf953b6 100644 --- a/lld/test/MachO/weak-binding.s +++ b/lld/test/MachO/weak-binding.s @@ -7,13 +7,13 @@ # RUN: llvm-objdump -d --no-show-raw-insn --bind --lazy-bind --weak-bind --full-contents %t/test | \ # RUN: FileCheck %s -# CHECK: Contents of section __la_symbol_ptr: +# CHECK: Contents of section __DATA,__la_symbol_ptr: ## Check that this section contains a nonzero pointer. It should point to ## _weak_external_fn, but we don't have a good way of testing the exact value as ## the bytes here are in little-endian order. # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} -# CHECK: Contents of section __got: +# CHECK: Contents of section __DATA_CONST,__got: ## Check that this section contains a nonzero pointer. It should point to ## _weak_external_for_gotpcrel. # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} diff --git a/lld/test/MachO/x86-64-reloc-unsigned.s b/lld/test/MachO/x86-64-reloc-unsigned.s index 52a3d536139c9..211a64b9ad3d2 100644 --- a/lld/test/MachO/x86-64-reloc-unsigned.s +++ b/lld/test/MachO/x86-64-reloc-unsigned.s @@ -2,9 +2,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o # RUN: llvm-objdump --full-contents %t | FileCheck %s -# CHECK: Contents of section foo: +# CHECK: Contents of section __DATA,foo: # CHECK: 100001000 08100000 01000000 -# CHECK: Contents of section bar: +# CHECK: Contents of section __DATA,bar: # CHECK: 100001008 011000f0 11211111 02000000 .globl _main, _foo, _bar diff --git a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml index 25ba88307fc18..5ad2815b9cd90 100644 --- a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml +++ b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml @@ -194,7 +194,7 @@ page-size: 0x00000000 # Also make sure the reloc for the FDE->function is the correct offset # It should be the offset from the fixup location back to the address # of the function we are referencing -# CODE: Contents of section __eh_frame: +# CODE: Contents of section __TEXT,__eh_frame: # This is the CIE: # CODE-NEXT: {{[0-9abcdef]*}} 1c000000 00000000 017a504c 52000178 # CODE-NEXT: {{[0-9abcdef]*}} 1e0700bd ffffffff ffffff00 100c1f00 diff --git a/lld/test/mach-o/eh-frame-relocs-arm64.yaml b/lld/test/mach-o/eh-frame-relocs-arm64.yaml index f8d538d7c109f..e669aaf8109b5 100644 --- a/lld/test/mach-o/eh-frame-relocs-arm64.yaml +++ b/lld/test/mach-o/eh-frame-relocs-arm64.yaml @@ -303,7 +303,7 @@ page-size: 0x00000000 # correct offset # It should be the offset from the fixup location back to the address # of the function we are referencing -# CODE: Contents of section __eh_frame: +# CODE: Contents of section __TEXT,__eh_frame: # This is the CIE: # CODE-NEXT: {{[0-9abcdef]*}} 18000000 00000000 037a504c 52000178 # CODE-NEXT: {{[0-9abcdef]*}} 1e079bd1 ffffff10 100c1f00 28000000 @@ -315,4 +315,4 @@ page-size: 0x00000000 # And a new CIE starts at this 00000018 right below here # CODE-NEXT: {{[0-9abcdef]*}} 019d0200 00000000 18000000 00000000 # CODE-NEXT: {{[0-9abcdef]*}} 037a504c 52000178 1e079b8d ffffff10 -# This is the important offset for its CIE->pfunc ^~~~~~~~~ \ No newline at end of file +# This is the important offset for its CIE->pfunc ^~~~~~~~~ diff --git a/lldb/bindings/interface/SBPlatform.i b/lldb/bindings/interface/SBPlatform.i index 81945222c059a..07aecfc354bb2 100644 --- a/lldb/bindings/interface/SBPlatform.i +++ b/lldb/bindings/interface/SBPlatform.i @@ -45,6 +45,7 @@ public: class SBPlatformShellCommand { public: + SBPlatformShellCommand (const char *shell, const char *shell_command); SBPlatformShellCommand (const char *shell_command); SBPlatformShellCommand (const SBPlatformShellCommand &rhs); @@ -54,6 +55,12 @@ public: void Clear(); + const char * + GetShell(); + + void + SetShell(const char *shell_interpreter); + const char * GetCommand(); diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 4d251b1299546..98291f18247dc 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -51,6 +51,7 @@ class LLDB_API SBPlatformConnectOptions { class LLDB_API SBPlatformShellCommand { public: + SBPlatformShellCommand(const char *shell, const char *shell_command); SBPlatformShellCommand(const char *shell_command); SBPlatformShellCommand(const SBPlatformShellCommand &rhs); @@ -61,6 +62,10 @@ class LLDB_API SBPlatformShellCommand { void Clear(); + const char *GetShell(); + + void SetShell(const char *shell); + const char *GetCommand(); void SetCommand(const char *shell_command); diff --git a/lldb/include/lldb/API/SBReproducer.h b/lldb/include/lldb/API/SBReproducer.h index 78044e9acbc31..5578162412c8b 100644 --- a/lldb/include/lldb/API/SBReproducer.h +++ b/lldb/include/lldb/API/SBReproducer.h @@ -11,8 +11,32 @@ #include "lldb/API/SBDefines.h" +namespace lldb_private { +namespace repro { +struct ReplayOptions; +} +} // namespace lldb_private + namespace lldb { +class LLDB_API SBReplayOptions { +public: + SBReplayOptions(); + SBReplayOptions(const SBReplayOptions &rhs); + ~SBReplayOptions(); + + SBReplayOptions &operator=(const SBReplayOptions &rhs); + + void SetVerify(bool verify); + bool GetVerify() const; + + void SetCheckVersion(bool check); + bool GetCheckVersion() const; + +private: + std::unique_ptr m_opaque_up; +}; + /// The SBReproducer class is special because it bootstraps the capture and /// replay of SB API calls. As a result we cannot rely on any other SB objects /// in the interface or implementation of this class. @@ -22,6 +46,7 @@ class LLDB_API SBReproducer { static const char *Capture(const char *path); static const char *Replay(const char *path); static const char *Replay(const char *path, bool skip_version_check); + static const char *Replay(const char *path, const SBReplayOptions &options); static const char *PassiveReplay(const char *path); static const char *GetPath(); static bool SetAutoGenerate(bool b); diff --git a/lldb/include/lldb/Host/Host.h b/lldb/include/lldb/Host/Host.h index f19cb85d2329c..76792cc6eab56 100644 --- a/lldb/include/lldb/Host/Host.h +++ b/lldb/include/lldb/Host/Host.h @@ -196,19 +196,34 @@ class Host { static Status ShellExpandArguments(ProcessLaunchInfo &launch_info); /// Run a shell command. - /// \arg command shouldn't be NULL + /// \arg command shouldn't be empty /// \arg working_dir Pass empty FileSpec to use the current working directory /// \arg status_ptr Pass NULL if you don't want the process exit status /// \arg signo_ptr Pass NULL if you don't want the signal that caused the /// process to exit /// \arg command_output Pass NULL if you don't want the command output /// \arg hide_stderr if this is false, redirect stderr to stdout - /// TODO: Convert this function to take a StringRef. - static Status RunShellCommand(const char *command, + static Status RunShellCommand(llvm::StringRef command, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout, - bool run_in_default_shell = true, + bool run_in_shell = true, + bool hide_stderr = false); + + /// Run a shell command. + /// \arg shell Pass an empty string if you want to use the default shell + /// interpreter \arg command \arg working_dir Pass empty FileSpec to use the + /// current working directory \arg status_ptr Pass NULL if you don't want + /// the process exit status \arg signo_ptr Pass NULL if you don't want the + /// signal that caused + /// the process to exit + /// \arg command_output Pass NULL if you don't want the command output + /// \arg hide_stderr If this is \b false, redirect stderr to stdout + static Status RunShellCommand(llvm::StringRef shell, llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output, + const Timeout &timeout, + bool run_in_shell = true, bool hide_stderr = false); /// Run a shell command. @@ -222,7 +237,23 @@ class Host { int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout, - bool run_in_default_shell = true, + bool run_in_shell = true, + bool hide_stderr = false); + + /// Run a shell command. + /// \arg shell Pass an empty string if you want to use the default + /// shell interpreter \arg command \arg working_dir Pass empty FileSpec to use + /// the current working directory \arg status_ptr Pass NULL if you don't + /// want the process exit status \arg signo_ptr Pass NULL if you don't + /// want the signal that caused the + /// process to exit + /// \arg command_output Pass NULL if you don't want the command output + /// \arg hide_stderr If this is \b false, redirect stderr to stdout + static Status RunShellCommand(llvm::StringRef shell, const Args &args, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output, + const Timeout &timeout, + bool run_in_shell = true, bool hide_stderr = false); static bool OpenFileInExternalEditor(const FileSpec &file_spec, diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 9335f73b37df1..64b49ecca6061 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -621,7 +621,18 @@ class Platform : public PluginInterface { } virtual lldb_private::Status RunShellCommand( - const char *command, // Shouldn't be nullptr + llvm::StringRef command, + const FileSpec &working_dir, // Pass empty FileSpec to use the current + // working directory + int *status_ptr, // Pass nullptr if you don't want the process exit status + int *signo_ptr, // Pass nullptr if you don't want the signal that caused + // the process to exit + std::string + *command_output, // Pass nullptr if you don't want the command output + const Timeout &timeout); + + virtual lldb_private::Status RunShellCommand( + llvm::StringRef shell, llvm::StringRef command, const FileSpec &working_dir, // Pass empty FileSpec to use the current // working directory int *status_ptr, // Pass nullptr if you don't want the process exit status diff --git a/lldb/include/lldb/Target/RemoteAwarePlatform.h b/lldb/include/lldb/Target/RemoteAwarePlatform.h index 5741dbe027b70..6d6ac99c093fa 100644 --- a/lldb/include/lldb/Target/RemoteAwarePlatform.h +++ b/lldb/include/lldb/Target/RemoteAwarePlatform.h @@ -68,11 +68,16 @@ class RemoteAwarePlatform : public Platform { bool GetRemoteOSKernelDescription(std::string &s) override; ArchSpec GetRemoteSystemArchitecture() override; - Status RunShellCommand(const char *command, const FileSpec &working_dir, + Status RunShellCommand(llvm::StringRef command, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout) override; + Status RunShellCommand(llvm::StringRef interpreter, llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output, + const Timeout &timeout) override; + const char *GetHostname() override; UserIDResolver &GetUserIDResolver() override; lldb_private::Environment GetEnvironment() override; diff --git a/lldb/include/lldb/Utility/Reproducer.h b/lldb/include/lldb/Utility/Reproducer.h index d6cde44850901..7e5591493d71e 100644 --- a/lldb/include/lldb/Utility/Reproducer.h +++ b/lldb/include/lldb/Utility/Reproducer.h @@ -227,6 +227,22 @@ class Reproducer { mutable std::mutex m_mutex; }; +class Verifier { +public: + Verifier(Loader *loader) : m_loader(loader) {} + void Verify(llvm::function_ref error_callback, + llvm::function_ref warning_callback, + llvm::function_ref note_callback) const; + +private: + Loader *m_loader; +}; + +struct ReplayOptions { + bool verify = true; + bool check_version = true; +}; + } // namespace repro } // namespace lldb_private diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index dacd5ed734b50..73faa2aef5e4b 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -179,12 +179,12 @@ def CMD_MSG(str): - '''A generic "Command '%s' returns successfully" message generator.''' - return "Command '%s' returns successfully" % str + '''A generic "Command '%s' did not return successfully" message generator.''' + return "Command '%s' did not return successfully" % str def COMPLETION_MSG(str_before, str_after, completions): - '''A generic message generator for the completion mechanism.''' + '''A generic assertion failed message generator for the completion mechanism.''' return ("'%s' successfully completes to '%s', but completions were:\n%s" % (str_before, str_after, "\n".join(completions))) @@ -198,8 +198,8 @@ def EXP_MSG(str, actual, exe): def SETTING_MSG(setting): - '''A generic "Value of setting '%s' is correct" message generator.''' - return "Value of setting '%s' is correct" % setting + '''A generic "Value of setting '%s' is not correct" message generator.''' + return "Value of setting '%s' is not correct" % setting def line_number(filename, string_to_match): @@ -1293,6 +1293,29 @@ def isPPC64le(self): return True return False + def isAArch64SVE(self): + triple = self.dbg.GetSelectedPlatform().GetTriple() + + # TODO other platforms, please implement this function + if not re.match(".*-.*-linux", triple): + return False + + # Need to do something different for non-Linux/Android targets + cpuinfo_path = self.getBuildArtifact("cpuinfo") + if configuration.lldb_platform_name: + self.runCmd('platform get-file "/proc/cpuinfo" ' + cpuinfo_path) + else: + cpuinfo_path = "/proc/cpuinfo" + + try: + f = open(cpuinfo_path, 'r') + cpuinfo = f.read() + f.close() + except: + return False + + return " sve " in cpuinfo + def getArchitecture(self): """Returns the architecture in effect the test suite is running with.""" module = builder_module() @@ -2433,58 +2456,76 @@ def expect( with recording(self, trace) as sbuf: print("looking at:", output, file=sbuf) - # The heading says either "Expecting" or "Not expecting". - heading = "Expecting" if matching else "Not expecting" + expecting_str = "Expecting" if matching else "Not expecting" + def found_str(matched): + return "was found" if matched else "was not found" - # Start from the startstr, if specified. - # If there's no startstr, set the initial state appropriately. - matched = output.startswith(startstr) if startstr else ( - True if matching else False) + # To be used as assert fail message and/or trace content + log_lines = [ + "{}:".format("Ran command" if exe else "Checking string"), + "\"{}\"".format(str), + # Space out command and output + "", + ] + if exe: + # Newline before output to make large strings more readable + log_lines.append("Got output:\n{}".format(output)) + # Assume that we start matched if we want a match + # Meaning if you have no conditions, matching or + # not matching will always pass + matched = matching + + # We will stop checking on first failure if startstr: - with recording(self, trace) as sbuf: - print("%s start string: %s" % (heading, startstr), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) + matched = output.startswith(startstr) + log_lines.append("{} start string: \"{}\" ({})".format( + expecting_str, startstr, found_str(matched))) - # Look for endstr, if specified. - keepgoing = matched if matching else not matched - if endstr: + if endstr and matched == matching: matched = output.endswith(endstr) - with recording(self, trace) as sbuf: - print("%s end string: %s" % (heading, endstr), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) + log_lines.append("{} end string: \"{}\" ({})".format( + expecting_str, endstr, found_str(matched))) - # Look for sub strings, if specified. - keepgoing = matched if matching else not matched - if substrs and keepgoing: + if substrs and matched == matching: start = 0 for substr in substrs: index = output[start:].find(substr) start = start + index if ordered and matching else 0 matched = index != -1 - with recording(self, trace) as sbuf: - print("%s sub string: %s" % (heading, substr), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) - keepgoing = matched if matching else not matched - if not keepgoing: + log_lines.append("{} sub string: \"{}\" ({})".format( + expecting_str, substr, found_str(matched))) + + if matched != matching: break - # Search for regular expression patterns, if specified. - keepgoing = matched if matching else not matched - if patterns and keepgoing: + if patterns and matched == matching: for pattern in patterns: - # Match Objects always have a boolean value of True. - matched = bool(re.search(pattern, output)) - with recording(self, trace) as sbuf: - print("%s pattern: %s" % (heading, pattern), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) - keepgoing = matched if matching else not matched - if not keepgoing: + matched = re.search(pattern, output) + + pattern_line = "{} regex pattern: \"{}\" ({}".format( + expecting_str, pattern, found_str(matched)) + if matched: + pattern_line += ", matched \"{}\"".format( + matched.group(0)) + pattern_line += ")" + log_lines.append(pattern_line) + + # Convert to bool because match objects + # are True-ish but != True itself + matched = bool(matched) + if matched != matching: break - self.assertTrue(matched if matching else not matched, - msg + "\nCommand output:\n" + EXP_MSG(str, output, exe) - if msg else EXP_MSG(str, output, exe)) + # If a check failed, add any extra assert message + if msg is not None and matched != matching: + log_lines.append(msg) + + log_msg = "\n".join(log_lines) + with recording(self, trace) as sbuf: + print(log_msg, file=sbuf) + if matched != matching: + self.fail(log_msg) def expect_expr( self, diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index 7ac852488ffbb..3c6422e211fca 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -50,14 +50,25 @@ struct PlatformConnectOptions { // PlatformShellCommand struct PlatformShellCommand { - PlatformShellCommand(const char *shell_command = nullptr) + PlatformShellCommand(llvm::StringRef shell_interpreter, + llvm::StringRef shell_command) : m_command(), m_working_dir(), m_status(0), m_signo(0) { - if (shell_command && shell_command[0]) - m_command = shell_command; + if (!shell_interpreter.empty()) + m_shell = shell_interpreter.str(); + + if (!m_shell.empty() && !shell_command.empty()) + m_command = shell_command.str(); + } + + PlatformShellCommand(llvm::StringRef shell_command = llvm::StringRef()) + : m_shell(), m_command(), m_working_dir(), m_status(0), m_signo(0) { + if (!shell_command.empty()) + m_command = shell_command.str(); } ~PlatformShellCommand() = default; + std::string m_shell; std::string m_command; std::string m_working_dir; std::string m_output; @@ -163,6 +174,13 @@ void SBPlatformConnectOptions::SetLocalCacheDirectory(const char *path) { } // SBPlatformShellCommand +SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_interpreter, + const char *shell_command) + : m_opaque_ptr(new PlatformShellCommand(shell_interpreter, shell_command)) { + LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *, const char *), + shell_interpreter, shell_command); +} + SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_command) : m_opaque_ptr(new PlatformShellCommand(shell_command)) { LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *), @@ -200,6 +218,24 @@ void SBPlatformShellCommand::Clear() { m_opaque_ptr->m_signo = 0; } +const char *SBPlatformShellCommand::GetShell() { + LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetShell); + + if (m_opaque_ptr->m_shell.empty()) + return nullptr; + return m_opaque_ptr->m_shell.c_str(); +} + +void SBPlatformShellCommand::SetShell(const char *shell_interpreter) { + LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetShell, (const char *), + shell_interpreter); + + if (shell_interpreter && shell_interpreter[0]) + m_opaque_ptr->m_shell = shell_interpreter; + else + m_opaque_ptr->m_shell.clear(); +} + const char *SBPlatformShellCommand::GetCommand() { LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetCommand); @@ -557,7 +593,8 @@ SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) { if (working_dir) shell_command.SetWorkingDirectory(working_dir); } - return platform_sp->RunShellCommand(command, FileSpec(working_dir), + return platform_sp->RunShellCommand(shell_command.m_opaque_ptr->m_shell, + command, FileSpec(working_dir), &shell_command.m_opaque_ptr->m_status, &shell_command.m_opaque_ptr->m_signo, &shell_command.m_opaque_ptr->m_output, @@ -699,6 +736,8 @@ void RegisterMethods(Registry &R) { SBPlatformShellCommand &, SBPlatformShellCommand, operator=,(const lldb::SBPlatformShellCommand &)); LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, Clear, ()); + LLDB_REGISTER_METHOD(const char *, SBPlatformShellCommand, GetShell, ()); + LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, SetShell, (const char *)); LLDB_REGISTER_METHOD(const char *, SBPlatformShellCommand, GetCommand, ()); LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, SetCommand, (const char *)); diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp index 7d08a88fe9e30..ec1c85d243294 100644 --- a/lldb/source/API/SBReproducer.cpp +++ b/lldb/source/API/SBReproducer.cpp @@ -30,6 +30,33 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::repro; +SBReplayOptions::SBReplayOptions() + : m_opaque_up(std::make_unique()){} + +SBReplayOptions::SBReplayOptions(const SBReplayOptions &rhs) + : m_opaque_up(std::make_unique(*rhs.m_opaque_up)) {} + +SBReplayOptions::~SBReplayOptions() = default; + +SBReplayOptions &SBReplayOptions::operator=(const SBReplayOptions &rhs) { + if (this == &rhs) + return *this; + *m_opaque_up = *rhs.m_opaque_up; + return *this; +} + +void SBReplayOptions::SetVerify(bool verify) { m_opaque_up->verify = verify; } + +bool SBReplayOptions::GetVerify() const { return m_opaque_up->verify; } + +void SBReplayOptions::SetCheckVersion(bool check) { + m_opaque_up->check_version = check; +} + +bool SBReplayOptions::GetCheckVersion() const { + return m_opaque_up->check_version; +} + SBRegistry::SBRegistry() { Registry &R = *this; @@ -163,10 +190,18 @@ const char *SBReproducer::PassiveReplay(const char *path) { } const char *SBReproducer::Replay(const char *path) { - return SBReproducer::Replay(path, false); + SBReplayOptions options; + return SBReproducer::Replay(path, options); } const char *SBReproducer::Replay(const char *path, bool skip_version_check) { + SBReplayOptions options; + options.SetCheckVersion(!skip_version_check); + return SBReproducer::Replay(path, options); +} + +const char *SBReproducer::Replay(const char *path, + const SBReplayOptions &options) { static std::string error; if (auto e = Reproducer::Initialize(ReproducerMode::Replay, FileSpec(path))) { error = llvm::toString(std::move(e)); @@ -179,7 +214,7 @@ const char *SBReproducer::Replay(const char *path, bool skip_version_check) { return error.c_str(); } - if (!skip_version_check) { + if (options.GetCheckVersion()) { llvm::Expected version = loader->LoadBuffer(); if (!version) { error = llvm::toString(version.takeError()); @@ -195,6 +230,30 @@ const char *SBReproducer::Replay(const char *path, bool skip_version_check) { } } + if (options.GetVerify()) { + bool verification_failed = false; + llvm::raw_string_ostream os(error); + auto error_callback = [&](llvm::StringRef error) { + verification_failed = true; + os << "\nerror: " << error; + }; + + auto warning_callback = [&](llvm::StringRef warning) { + verification_failed = true; + os << "\nwarning: " << warning; + }; + + auto note_callback = [&](llvm::StringRef warning) {}; + + Verifier verifier(loader); + verifier.Verify(error_callback, warning_callback, note_callback); + + if (verification_failed) { + os.flush(); + return error.c_str(); + } + } + FileSpec file = loader->GetFile(); if (!file) { error = "unable to get replay data from reproducer."; diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index b5409e611f058..3a5af9f91cf16 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -1611,6 +1611,16 @@ class CommandObjectPlatformShell : public CommandObjectRaw { else m_timeout = std::chrono::seconds(timeout_sec); break; + case 's': { + if (option_arg.empty()) { + error.SetErrorStringWithFormat( + "missing shell interpreter path for option -i|--interpreter."); + return error; + } + + m_shell_interpreter = option_arg.str(); + break; + } default: llvm_unreachable("Unimplemented option"); } @@ -1621,10 +1631,12 @@ class CommandObjectPlatformShell : public CommandObjectRaw { void OptionParsingStarting(ExecutionContext *execution_context) override { m_timeout.reset(); m_use_host_platform = false; + m_shell_interpreter.clear(); } Timeout m_timeout = std::chrono::seconds(10); bool m_use_host_platform; + std::string m_shell_interpreter; }; CommandObjectPlatformShell(CommandInterpreter &interpreter) @@ -1650,7 +1662,6 @@ class CommandObjectPlatformShell : public CommandObjectRaw { const bool is_alias = !raw_command_line.contains("platform"); OptionsWithRaw args(raw_command_line); - const char *expr = args.GetRawPart().c_str(); if (args.HasArgs()) if (!ParseOptions(args.GetArgs(), result)) @@ -1662,6 +1673,8 @@ class CommandObjectPlatformShell : public CommandObjectRaw { return false; } + llvm::StringRef cmd = args.GetRawPart(); + PlatformSP platform_sp( m_options.m_use_host_platform ? Platform::GetHostPlatform() @@ -1672,7 +1685,8 @@ class CommandObjectPlatformShell : public CommandObjectRaw { std::string output; int status = -1; int signo = -1; - error = (platform_sp->RunShellCommand(expr, working_dir, &status, &signo, + error = (platform_sp->RunShellCommand(m_options.m_shell_interpreter, cmd, + working_dir, &status, &signo, &output, m_options.m_timeout)); if (!output.empty()) result.GetOutputStream().PutCString(output); diff --git a/lldb/source/Commands/CommandObjectReproducer.cpp b/lldb/source/Commands/CommandObjectReproducer.cpp index da2d9ca5a901a..ae4894009054b 100644 --- a/lldb/source/Commands/CommandObjectReproducer.cpp +++ b/lldb/source/Commands/CommandObjectReproducer.cpp @@ -116,6 +116,9 @@ static constexpr OptionEnumValues ReproducerSignalType() { #define LLDB_OPTIONS_reproducer_xcrash #include "CommandOptions.inc" +#define LLDB_OPTIONS_reproducer_verify +#include "CommandOptions.inc" + template llvm::Expected static ReadFromYAML(StringRef filename) { auto error_or_file = MemoryBuffer::getFile(filename); @@ -134,6 +137,38 @@ llvm::Expected static ReadFromYAML(StringRef filename) { return t; } +static void SetError(CommandReturnObject &result, Error err) { + result.GetErrorStream().Printf("error: %s\n", + toString(std::move(err)).c_str()); + result.SetStatus(eReturnStatusFailed); +} + +/// Create a loader from the given path if specified. Otherwise use the current +/// loader used for replay. +static Loader * +GetLoaderFromPathOrCurrent(llvm::Optional &loader_storage, + CommandReturnObject &result, + FileSpec reproducer_path) { + if (reproducer_path) { + loader_storage.emplace(reproducer_path); + Loader *loader = &(*loader_storage); + if (Error err = loader->LoadIndex()) { + // This is a hard error and will set the result to eReturnStatusFailed. + SetError(result, std::move(err)); + return nullptr; + } + return loader; + } + + if (Loader *loader = Reproducer::Instance().GetLoader()) + return loader; + + // This is a soft error because this is expected to fail during capture. + result.SetError("Not specifying a reproducer is only support during replay."); + result.SetStatus(eReturnStatusSuccessFinishNoResult); + return nullptr; +} + class CommandObjectReproducerGenerate : public CommandObjectParsed { public: CommandObjectReproducerGenerate(CommandInterpreter &interpreter) @@ -312,12 +347,6 @@ class CommandObjectReproducerStatus : public CommandObjectParsed { } }; -static void SetError(CommandReturnObject &result, Error err) { - result.GetErrorStream().Printf("error: %s\n", - toString(std::move(err)).c_str()); - result.SetStatus(eReturnStatusFailed); -} - class CommandObjectReproducerDump : public CommandObjectParsed { public: CommandObjectReproducerDump(CommandInterpreter &interpreter) @@ -382,29 +411,11 @@ class CommandObjectReproducerDump : public CommandObjectParsed { return false; } - // If no reproducer path is specified, use the loader currently used for - // replay. Otherwise create a new loader just for dumping. llvm::Optional loader_storage; - Loader *loader = nullptr; - if (!m_options.file) { - loader = Reproducer::Instance().GetLoader(); - if (loader == nullptr) { - result.SetError( - "Not specifying a reproducer is only support during replay."); - result.SetStatus(eReturnStatusSuccessFinishNoResult); - return false; - } - } else { - loader_storage.emplace(m_options.file); - loader = &(*loader_storage); - if (Error err = loader->LoadIndex()) { - SetError(result, std::move(err)); - return false; - } - } - - // If we get here we should have a valid loader. - assert(loader); + Loader *loader = + GetLoaderFromPathOrCurrent(loader_storage, result, m_options.file); + if (!loader) + return false; switch (m_options.provider) { case eReproducerProviderFiles: { @@ -583,6 +594,101 @@ class CommandObjectReproducerDump : public CommandObjectParsed { CommandOptions m_options; }; +class CommandObjectReproducerVerify : public CommandObjectParsed { +public: + CommandObjectReproducerVerify(CommandInterpreter &interpreter) + : CommandObjectParsed(interpreter, "reproducer verify", + "Verify the contents of a reproducer. " + "If no reproducer is specified during replay, it " + "verifies the content of the current reproducer.", + nullptr) {} + + ~CommandObjectReproducerVerify() override = default; + + Options *GetOptions() override { return &m_options; } + + class CommandOptions : public Options { + public: + CommandOptions() : Options(), file() {} + + ~CommandOptions() override = default; + + Status SetOptionValue(uint32_t option_idx, StringRef option_arg, + ExecutionContext *execution_context) override { + Status error; + const int short_option = m_getopt_table[option_idx].val; + + switch (short_option) { + case 'f': + file.SetFile(option_arg, FileSpec::Style::native); + FileSystem::Instance().Resolve(file); + break; + default: + llvm_unreachable("Unimplemented option"); + } + + return error; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + file.Clear(); + } + + ArrayRef GetDefinitions() override { + return makeArrayRef(g_reproducer_verify_options); + } + + FileSpec file; + }; + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + if (!command.empty()) { + result.AppendErrorWithFormat("'%s' takes no arguments", + m_cmd_name.c_str()); + return false; + } + + llvm::Optional loader_storage; + Loader *loader = + GetLoaderFromPathOrCurrent(loader_storage, result, m_options.file); + if (!loader) + return false; + + bool errors = false; + auto error_callback = [&](llvm::StringRef error) { + errors = true; + result.AppendError(error); + }; + + bool warnings = false; + auto warning_callback = [&](llvm::StringRef warning) { + warnings = true; + result.AppendWarning(warning); + }; + + auto note_callback = [&](llvm::StringRef warning) { + result.AppendMessage(warning); + }; + + Verifier verifier(loader); + verifier.Verify(error_callback, warning_callback, note_callback); + + if (warnings || errors) { + result.AppendMessage("reproducer verification failed"); + result.SetStatus(eReturnStatusFailed); + } else { + result.AppendMessage("reproducer verification succeeded"); + result.SetStatus(eReturnStatusSuccessFinishResult); + } + + return result.Succeeded(); + } + +private: + CommandOptions m_options; +}; + CommandObjectReproducer::CommandObjectReproducer( CommandInterpreter &interpreter) : CommandObjectMultiword( @@ -605,6 +711,8 @@ CommandObjectReproducer::CommandObjectReproducer( new CommandObjectReproducerStatus(interpreter))); LoadSubCommand("dump", CommandObjectSP(new CommandObjectReproducerDump(interpreter))); + LoadSubCommand("verify", CommandObjectSP( + new CommandObjectReproducerVerify(interpreter))); LoadSubCommand("xcrash", CommandObjectSP( new CommandObjectReproducerXCrash(interpreter))); } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index fbb64957f48d3..eacd6de1910c1 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -451,6 +451,12 @@ let Command = "reproducer dump" in { "provided, that reproducer is dumped.">; } +let Command = "reproducer verify" in { + def reproducer_verify_file : Option<"file", "f">, Group<1>, Arg<"Filename">, + Desc<"The reproducer path. If a reproducer is replayed and no path is " + "provided, that reproducer is dumped.">; +} + let Command = "reproducer xcrash" in { def reproducer_signal : Option<"signal", "s">, Group<1>, EnumArg<"None", "ReproducerSignalType()">, @@ -631,6 +637,8 @@ let Command = "platform shell" in { Desc<"Run the commands on the host shell when enabled.">; def platform_shell_timeout : Option<"timeout", "t">, Arg<"Value">, Desc<"Seconds to wait for the remote host to finish running the command.">; + def platform_shell_interpreter : Option<"shell", "s">, Arg<"Path">, + Desc<"Shell interpreter path. This is the binary used to run the command.">; } let Command = "process attach" in { diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 10aff7a6c2175..19066e6be6232 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1497,19 +1497,18 @@ struct Row { ValueObjectManager value; Row *parent; // The process stop ID when the children were calculated. - uint32_t children_stop_id; - int row_idx; - int x; - int y; + uint32_t children_stop_id = 0; + int row_idx = 0; + int x = 1; + int y = 1; bool might_have_children; - bool expanded; - bool calculated_children; + bool expanded = false; + bool calculated_children = false; std::vector children; Row(const ValueObjectSP &v, Row *p) - : value(v, lldb::eDynamicDontRunTarget, true), parent(p), row_idx(0), - x(1), y(1), might_have_children(v ? v->MightHaveChildren() : false), - expanded(false), calculated_children(false), children() {} + : value(v, lldb::eDynamicDontRunTarget, true), parent(p), + might_have_children(v ? v->MightHaveChildren() : false) {} size_t GetDepth() const { if (parent) diff --git a/lldb/source/Host/common/Host.cpp b/lldb/source/Host/common/Host.cpp index 71c2983ab00f3..958fca07850bf 100644 --- a/lldb/source/Host/common/Host.cpp +++ b/lldb/source/Host/common/Host.cpp @@ -467,14 +467,24 @@ MonitorShellCommand(std::shared_ptr shell_info, lldb::pid_t pid, return true; } -Status Host::RunShellCommand(const char *command, const FileSpec &working_dir, - int *status_ptr, int *signo_ptr, - std::string *command_output_ptr, +Status Host::RunShellCommand(llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output_ptr, + const Timeout &timeout, + bool run_in_shell, bool hide_stderr) { + return RunShellCommand(llvm::StringRef(), Args(command), working_dir, + status_ptr, signo_ptr, command_output_ptr, timeout, + run_in_shell, hide_stderr); +} + +Status Host::RunShellCommand(llvm::StringRef shell_path, + llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output_ptr, const Timeout &timeout, - bool run_in_default_shell, - bool hide_stderr) { - return RunShellCommand(Args(command), working_dir, status_ptr, signo_ptr, - command_output_ptr, timeout, run_in_default_shell, + bool run_in_shell, bool hide_stderr) { + return RunShellCommand(shell_path, Args(command), working_dir, status_ptr, + signo_ptr, command_output_ptr, timeout, run_in_shell, hide_stderr); } @@ -482,14 +492,27 @@ Status Host::RunShellCommand(const Args &args, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output_ptr, const Timeout &timeout, - bool run_in_default_shell, - bool hide_stderr) { + bool run_in_shell, bool hide_stderr) { + return RunShellCommand(llvm::StringRef(), args, working_dir, status_ptr, + signo_ptr, command_output_ptr, timeout, run_in_shell, + hide_stderr); +} + +Status Host::RunShellCommand(llvm::StringRef shell_path, const Args &args, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output_ptr, + const Timeout &timeout, + bool run_in_shell, bool hide_stderr) { Status error; ProcessLaunchInfo launch_info; launch_info.SetArchitecture(HostInfo::GetArchitecture()); - if (run_in_default_shell) { + if (run_in_shell) { // Run the command in a shell - launch_info.SetShell(HostInfo::GetDefaultShell()); + FileSpec shell = HostInfo::GetDefaultShell(); + if (!shell_path.empty()) + shell.SetPath(shell_path); + + launch_info.SetShell(shell); launch_info.GetArguments().AppendArguments(args); const bool localhost = true; const bool will_debug = false; diff --git a/lldb/source/Host/linux/Host.cpp b/lldb/source/Host/linux/Host.cpp index 45973f5d214b2..520a00df35f6d 100644 --- a/lldb/source/Host/linux/Host.cpp +++ b/lldb/source/Host/linux/Host.cpp @@ -16,6 +16,7 @@ #include #include +#include "llvm/ADT/StringSwitch.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ScopedPrinter.h" @@ -35,8 +36,11 @@ using namespace lldb_private; namespace { enum class ProcessState { Unknown, + Dead, DiskSleep, + Idle, Paging, + Parked, Running, Sleeping, TracedOrStopped, @@ -50,12 +54,14 @@ class ProcessLaunchInfo; static bool GetStatusInfo(::pid_t Pid, ProcessInstanceInfo &ProcessInfo, ProcessState &State, ::pid_t &TracerPid) { + Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_HOST); + auto BufferOrError = getProcFile(Pid, "status"); if (!BufferOrError) return false; llvm::StringRef Rest = BufferOrError.get()->getBuffer(); - while(!Rest.empty()) { + while (!Rest.empty()) { llvm::StringRef Line; std::tie(Line, Rest) = Rest.split('\n'); @@ -84,26 +90,19 @@ static bool GetStatusInfo(::pid_t Pid, ProcessInstanceInfo &ProcessInfo, Line.ltrim().consumeInteger(10, PPid); ProcessInfo.SetParentProcessID(PPid); } else if (Line.consume_front("State:")) { - char S = Line.ltrim().front(); - switch (S) { - case 'R': - State = ProcessState::Running; - break; - case 'S': - State = ProcessState::Sleeping; - break; - case 'D': - State = ProcessState::DiskSleep; - break; - case 'Z': - State = ProcessState::Zombie; - break; - case 'T': - State = ProcessState::TracedOrStopped; - break; - case 'W': - State = ProcessState::Paging; - break; + State = llvm::StringSwitch(Line.ltrim().take_front(1)) + .Case("D", ProcessState::DiskSleep) + .Case("I", ProcessState::Idle) + .Case("R", ProcessState::Running) + .Case("S", ProcessState::Sleeping) + .CaseLower("T", ProcessState::TracedOrStopped) + .Case("W", ProcessState::Paging) + .Case("P", ProcessState::Parked) + .Case("X", ProcessState::Dead) + .Case("Z", ProcessState::Zombie) + .Default(ProcessState::Unknown); + if (State == ProcessState::Unknown) { + LLDB_LOG(log, "Unknown process state {0}", Line); } } else if (Line.consume_front("TracerPid:")) { Line = Line.ltrim(); diff --git a/lldb/source/Host/macosx/objcxx/CMakeLists.txt b/lldb/source/Host/macosx/objcxx/CMakeLists.txt index e55b094c0c305..9db24f3064185 100644 --- a/lldb/source/Host/macosx/objcxx/CMakeLists.txt +++ b/lldb/source/Host/macosx/objcxx/CMakeLists.txt @@ -14,3 +14,5 @@ add_lldb_library(lldbHostMacOSXObjCXX LINK_COMPONENTS Support ) + +target_compile_options(lldbHostMacOSXObjCXX PRIVATE -fno-objc-exceptions) diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index faac6f59190af..8cd3b35919936 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -1323,11 +1323,11 @@ static bool ShouldLaunchUsingXPC(ProcessLaunchInfo &launch_info) { launch_info.SetWorkingDirectory(working_dir); } } - bool run_in_default_shell = true; + bool run_in_shell = true; bool hide_stderr = true; - Status e = RunShellCommand(expand_command, cwd, &status, nullptr, &output, - std::chrono::seconds(10), run_in_default_shell, - hide_stderr); + Status e = + RunShellCommand(expand_command, cwd, &status, nullptr, &output, + std::chrono::seconds(10), run_in_shell, hide_stderr); if (e.Fail()) return e; diff --git a/lldb/source/Initialization/SystemInitializerCommon.cpp b/lldb/source/Initialization/SystemInitializerCommon.cpp index d352173e11588..b29138c4884f6 100644 --- a/lldb/source/Initialization/SystemInitializerCommon.cpp +++ b/lldb/source/Initialization/SystemInitializerCommon.cpp @@ -79,9 +79,10 @@ static llvm::Error InitializeFileSystem() { repro::FileProvider &fp = g->GetOrCreate(); FileSystem::Initialize(fp.GetFileCollector()); - repro::WorkingDirectoryProvider &wp = - g->GetOrCreate(); - fp.RecordInterestingDirectory(wp.GetDirectory()); + fp.RecordInterestingDirectory( + g->GetOrCreate().GetDirectory()); + fp.RecordInterestingDirectory( + g->GetOrCreate().GetDirectory()); return llvm::Error::success(); } diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 8c77227d01f2a..1f67468000976 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2091,9 +2091,12 @@ static void GetHomeInitFile(llvm::SmallVectorImpl &init_file, FileSystem::Instance().Resolve(init_file); } -static void GetHomeREPLInitFile(llvm::SmallVectorImpl &init_file, - LanguageType language) { - if (language == LanguageType::eLanguageTypeUnknown) +static void GetHomeREPLInitFile(llvm::SmallVectorImpl &init_file) { + LanguageSet repl_languages = Language::GetLanguagesSupportingREPLs(); + LanguageType language = eLanguageTypeUnknown; + if (auto main_repl_language = repl_languages.GetSingularLanguage()) + language = *main_repl_language; + else return; std::string init_file_name = @@ -2191,13 +2194,8 @@ void CommandInterpreter::SourceInitFileHome(CommandReturnObject &result, llvm::SmallString<128> init_file; - if (is_repl) { - LanguageType language = {}; - TargetSP target_sp = GetDebugger().GetSelectedTarget(); - if (target_sp) - language = target_sp->GetLanguage(); - GetHomeREPLInitFile(init_file, language); - } + if (is_repl) + GetHomeREPLInitFile(init_file); if (init_file.empty()) GetHomeInitFile(init_file); diff --git a/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt b/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt index 946ff0a64c26f..7d094a5865c43 100644 --- a/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt +++ b/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt @@ -15,3 +15,6 @@ add_lldb_library(lldbPluginPlatformMacOSXObjCXX Object Support ) + + +target_compile_options(lldbPluginPlatformMacOSXObjCXX PRIVATE -fno-objc-exceptions) diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp index e1eb15c3e8c92..0e0b61f1534f7 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp @@ -711,7 +711,7 @@ bool PlatformRemoteGDBServer::GetFileExists(const FileSpec &file_spec) { } Status PlatformRemoteGDBServer::RunShellCommand( - const char *command, // Shouldn't be NULL + llvm::StringRef shell, llvm::StringRef command, const FileSpec & working_dir, // Pass empty FileSpec to use the current working directory int *status_ptr, // Pass NULL if you don't want the process exit status diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h index 3562b2bb09dfc..297b482eb87ad 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h @@ -140,7 +140,7 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver { Status Unlink(const FileSpec &path) override; Status RunShellCommand( - const char *command, // Shouldn't be NULL + llvm::StringRef shell, llvm::StringRef command, const FileSpec &working_dir, // Pass empty FileSpec to use the current // working directory int *status_ptr, // Pass NULL if you don't want the process exit status diff --git a/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp b/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp index 48dbddb86cca3..3accc9cef6edb 100644 --- a/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp @@ -164,9 +164,7 @@ lldb::RegisterContextSP FreeBSDThread::GetRegisterContext() { assert(target_arch.GetTriple().getOS() == llvm::Triple::FreeBSD); switch (target_arch.GetMachine()) { case llvm::Triple::aarch64: - break; case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(target_arch); break; case llvm::Triple::ppc: #ifndef __powerpc64__ @@ -200,7 +198,8 @@ lldb::RegisterContextSP FreeBSDThread::GetRegisterContext() { } case llvm::Triple::arm: { RegisterContextPOSIXProcessMonitor_arm *reg_ctx = - new RegisterContextPOSIXProcessMonitor_arm(*this, 0, reg_interface); + new RegisterContextPOSIXProcessMonitor_arm( + *this, std::make_unique(target_arch)); m_posix_thread = reg_ctx; m_reg_context_sp.reset(reg_ctx); break; diff --git a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp index 4216f68faf5c6..2f4d613f767af 100644 --- a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp @@ -21,9 +21,9 @@ using namespace lldb; #define REG_CONTEXT_SIZE (GetGPRSize()) RegisterContextPOSIXProcessMonitor_arm::RegisterContextPOSIXProcessMonitor_arm( - Thread &thread, uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info) - : RegisterContextPOSIX_arm(thread, concrete_frame_idx, register_info) {} + lldb_private::Thread &thread, + std::unique_ptr register_info) + : RegisterContextPOSIX_arm(thread, std::move(register_info)) {} ProcessMonitor &RegisterContextPOSIXProcessMonitor_arm::GetMonitor() { ProcessSP base = CalculateProcess(); diff --git a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h index b376967df99ce..12e1f19d32fac 100644 --- a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h +++ b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h @@ -16,8 +16,8 @@ class RegisterContextPOSIXProcessMonitor_arm : public RegisterContextPOSIX_arm, public POSIXBreakpointProtocol { public: RegisterContextPOSIXProcessMonitor_arm( - lldb_private::Thread &thread, uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info); + lldb_private::Thread &thread, + std::unique_ptr register_info); protected: bool ReadGPR(); diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp index a83491e6d8987..04714ec3c3749 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp @@ -43,55 +43,6 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::process_linux; -// arm general purpose registers. -static const uint32_t g_gpr_regnums_arm[] = { - gpr_r0_arm, gpr_r1_arm, gpr_r2_arm, gpr_r3_arm, gpr_r4_arm, - gpr_r5_arm, gpr_r6_arm, gpr_r7_arm, gpr_r8_arm, gpr_r9_arm, - gpr_r10_arm, gpr_r11_arm, gpr_r12_arm, gpr_sp_arm, gpr_lr_arm, - gpr_pc_arm, gpr_cpsr_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag -}; -static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == - k_num_gpr_registers_arm, - "g_gpr_regnums_arm has wrong number of register infos"); - -// arm floating point registers. -static const uint32_t g_fpu_regnums_arm[] = { - fpu_s0_arm, fpu_s1_arm, fpu_s2_arm, fpu_s3_arm, fpu_s4_arm, - fpu_s5_arm, fpu_s6_arm, fpu_s7_arm, fpu_s8_arm, fpu_s9_arm, - fpu_s10_arm, fpu_s11_arm, fpu_s12_arm, fpu_s13_arm, fpu_s14_arm, - fpu_s15_arm, fpu_s16_arm, fpu_s17_arm, fpu_s18_arm, fpu_s19_arm, - fpu_s20_arm, fpu_s21_arm, fpu_s22_arm, fpu_s23_arm, fpu_s24_arm, - fpu_s25_arm, fpu_s26_arm, fpu_s27_arm, fpu_s28_arm, fpu_s29_arm, - fpu_s30_arm, fpu_s31_arm, fpu_fpscr_arm, fpu_d0_arm, fpu_d1_arm, - fpu_d2_arm, fpu_d3_arm, fpu_d4_arm, fpu_d5_arm, fpu_d6_arm, - fpu_d7_arm, fpu_d8_arm, fpu_d9_arm, fpu_d10_arm, fpu_d11_arm, - fpu_d12_arm, fpu_d13_arm, fpu_d14_arm, fpu_d15_arm, fpu_d16_arm, - fpu_d17_arm, fpu_d18_arm, fpu_d19_arm, fpu_d20_arm, fpu_d21_arm, - fpu_d22_arm, fpu_d23_arm, fpu_d24_arm, fpu_d25_arm, fpu_d26_arm, - fpu_d27_arm, fpu_d28_arm, fpu_d29_arm, fpu_d30_arm, fpu_d31_arm, - fpu_q0_arm, fpu_q1_arm, fpu_q2_arm, fpu_q3_arm, fpu_q4_arm, - fpu_q5_arm, fpu_q6_arm, fpu_q7_arm, fpu_q8_arm, fpu_q9_arm, - fpu_q10_arm, fpu_q11_arm, fpu_q12_arm, fpu_q13_arm, fpu_q14_arm, - fpu_q15_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag -}; -static_assert(((sizeof g_fpu_regnums_arm / sizeof g_fpu_regnums_arm[0]) - 1) == - k_num_fpr_registers_arm, - "g_fpu_regnums_arm has wrong number of register infos"); - -namespace { -// Number of register sets provided by this context. -enum { k_num_register_sets = 2 }; -} - -// Register sets for arm. -static const RegisterSet g_reg_sets_arm[k_num_register_sets] = { - {"General Purpose Registers", "gpr", k_num_gpr_registers_arm, - g_gpr_regnums_arm}, - {"Floating Point Registers", "fpu", k_num_fpr_registers_arm, - g_fpu_regnums_arm}}; - #if defined(__arm__) std::unique_ptr @@ -107,22 +58,7 @@ NativeRegisterContextLinux_arm::NativeRegisterContextLinux_arm( const ArchSpec &target_arch, NativeThreadProtocol &native_thread) : NativeRegisterContextLinux(native_thread, new RegisterInfoPOSIX_arm(target_arch)) { - switch (target_arch.GetMachine()) { - case llvm::Triple::arm: - m_reg_info.num_registers = k_num_registers_arm; - m_reg_info.num_gpr_registers = k_num_gpr_registers_arm; - m_reg_info.num_fpr_registers = k_num_fpr_registers_arm; - m_reg_info.last_gpr = k_last_gpr_arm; - m_reg_info.first_fpr = k_first_fpr_arm; - m_reg_info.last_fpr = k_last_fpr_arm; - m_reg_info.first_fpr_v = fpu_s0_arm; - m_reg_info.last_fpr_v = fpu_s31_arm; - m_reg_info.gpr_flags = gpr_cpsr_arm; - break; - default: - assert(false && "Unhandled target architecture."); - break; - } + assert(target_arch.GetMachine() == llvm::Triple::arm); ::memset(&m_fpr, 0, sizeof(m_fpr)); ::memset(&m_gpr_arm, 0, sizeof(m_gpr_arm)); @@ -135,23 +71,24 @@ NativeRegisterContextLinux_arm::NativeRegisterContextLinux_arm( m_refresh_hwdebug_info = true; } +RegisterInfoPOSIX_arm &NativeRegisterContextLinux_arm::GetRegisterInfo() const { + return static_cast(*m_register_info_interface_up); +} + uint32_t NativeRegisterContextLinux_arm::GetRegisterSetCount() const { - return k_num_register_sets; + return GetRegisterInfo().GetRegisterSetCount(); } uint32_t NativeRegisterContextLinux_arm::GetUserRegisterCount() const { uint32_t count = 0; - for (uint32_t set_index = 0; set_index < k_num_register_sets; ++set_index) - count += g_reg_sets_arm[set_index].num_registers; + for (uint32_t set_index = 0; set_index < GetRegisterSetCount(); ++set_index) + count += GetRegisterSet(set_index)->num_registers; return count; } const RegisterSet * NativeRegisterContextLinux_arm::GetRegisterSet(uint32_t set_index) const { - if (set_index < k_num_register_sets) - return &g_reg_sets_arm[set_index]; - - return nullptr; + return GetRegisterInfo().GetRegisterSet(set_index); } Status @@ -336,11 +273,17 @@ Status NativeRegisterContextLinux_arm::WriteAllRegisterValues( } bool NativeRegisterContextLinux_arm::IsGPR(unsigned reg) const { - return reg <= m_reg_info.last_gpr; // GPR's come first. + if (GetRegisterInfo().GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::GPRegSet) + return true; + return false; } bool NativeRegisterContextLinux_arm::IsFPR(unsigned reg) const { - return (m_reg_info.first_fpr <= reg && reg <= m_reg_info.last_fpr); + if (GetRegisterInfo().GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::FPRegSet) + return true; + return false; } uint32_t NativeRegisterContextLinux_arm::NumSupportedHardwareBreakpoints() { @@ -851,8 +794,7 @@ Status NativeRegisterContextLinux_arm::WriteHardwareDebugRegs(int hwbType, uint32_t NativeRegisterContextLinux_arm::CalculateFprOffset( const RegisterInfo *reg_info) const { - return reg_info->byte_offset - - GetRegisterInfoAtIndex(m_reg_info.first_fpr)->byte_offset; + return reg_info->byte_offset - GetGPRSize(); } Status NativeRegisterContextLinux_arm::DoReadRegisterValue( diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h index 6bd4db573c619..4ce3797e7bdea 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h @@ -12,6 +12,7 @@ #define lldb_NativeRegisterContextLinux_arm_h #include "Plugins/Process/Linux/NativeRegisterContextLinux.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_arm.h" #include "Plugins/Process/Utility/lldb-arm-register-enums.h" namespace lldb_private { @@ -98,37 +99,8 @@ class NativeRegisterContextLinux_arm : public NativeRegisterContextLinux { size_t GetFPRSize() override { return sizeof(m_fpr); } private: - struct RegInfo { - uint32_t num_registers; - uint32_t num_gpr_registers; - uint32_t num_fpr_registers; - - uint32_t last_gpr; - uint32_t first_fpr; - uint32_t last_fpr; - - uint32_t first_fpr_v; - uint32_t last_fpr_v; - - uint32_t gpr_flags; - }; - - struct QReg { - uint8_t bytes[16]; - }; - - struct FPU { - union { - uint32_t s[32]; - uint64_t d[32]; - QReg q[16]; // the 128-bit NEON registers - } floats; - uint32_t fpscr; - }; - uint32_t m_gpr_arm[k_num_gpr_registers_arm]; - RegInfo m_reg_info; - FPU m_fpr; + RegisterInfoPOSIX_arm::FPU m_fpr; // Debug register info for hardware breakpoints and watchpoints management. struct DREG { @@ -156,6 +128,8 @@ class NativeRegisterContextLinux_arm : public NativeRegisterContextLinux { Status WriteHardwareDebugRegs(int hwbType, int hwb_index); uint32_t CalculateFprOffset(const RegisterInfo *reg_info) const; + + RegisterInfoPOSIX_arm &GetRegisterInfo() const; }; } // namespace process_linux diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp index 617893b6b3b04..97a760396ba92 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp @@ -25,88 +25,25 @@ using namespace lldb; using namespace lldb_private; -// arm general purpose registers. -const uint32_t g_gpr_regnums_arm[] = { - gpr_r0_arm, gpr_r1_arm, gpr_r2_arm, gpr_r3_arm, gpr_r4_arm, - gpr_r5_arm, gpr_r6_arm, gpr_r7_arm, gpr_r8_arm, gpr_r9_arm, - gpr_r10_arm, gpr_r11_arm, gpr_r12_arm, gpr_sp_arm, gpr_lr_arm, - gpr_pc_arm, gpr_cpsr_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag - -}; -static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == - k_num_gpr_registers_arm, - "g_gpr_regnums_arm has wrong number of register infos"); - -// arm floating point registers. -static const uint32_t g_fpu_regnums_arm[] = { - fpu_s0_arm, fpu_s1_arm, fpu_s2_arm, fpu_s3_arm, fpu_s4_arm, - fpu_s5_arm, fpu_s6_arm, fpu_s7_arm, fpu_s8_arm, fpu_s9_arm, - fpu_s10_arm, fpu_s11_arm, fpu_s12_arm, fpu_s13_arm, fpu_s14_arm, - fpu_s15_arm, fpu_s16_arm, fpu_s17_arm, fpu_s18_arm, fpu_s19_arm, - fpu_s20_arm, fpu_s21_arm, fpu_s22_arm, fpu_s23_arm, fpu_s24_arm, - fpu_s25_arm, fpu_s26_arm, fpu_s27_arm, fpu_s28_arm, fpu_s29_arm, - fpu_s30_arm, fpu_s31_arm, fpu_fpscr_arm, fpu_d0_arm, fpu_d1_arm, - fpu_d2_arm, fpu_d3_arm, fpu_d4_arm, fpu_d5_arm, fpu_d6_arm, - fpu_d7_arm, fpu_d8_arm, fpu_d9_arm, fpu_d10_arm, fpu_d11_arm, - fpu_d12_arm, fpu_d13_arm, fpu_d14_arm, fpu_d15_arm, fpu_d16_arm, - fpu_d17_arm, fpu_d18_arm, fpu_d19_arm, fpu_d20_arm, fpu_d21_arm, - fpu_d22_arm, fpu_d23_arm, fpu_d24_arm, fpu_d25_arm, fpu_d26_arm, - fpu_d27_arm, fpu_d28_arm, fpu_d29_arm, fpu_d30_arm, fpu_d31_arm, - fpu_q0_arm, fpu_q1_arm, fpu_q2_arm, fpu_q3_arm, fpu_q4_arm, - fpu_q5_arm, fpu_q6_arm, fpu_q7_arm, fpu_q8_arm, fpu_q9_arm, - fpu_q10_arm, fpu_q11_arm, fpu_q12_arm, fpu_q13_arm, fpu_q14_arm, - fpu_q15_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag - -}; -static_assert(((sizeof g_fpu_regnums_arm / sizeof g_fpu_regnums_arm[0]) - 1) == - k_num_fpr_registers_arm, - "g_fpu_regnums_arm has wrong number of register infos"); - -// Number of register sets provided by this context. -enum { k_num_register_sets = 2 }; - -// Register sets for arm. -static const lldb_private::RegisterSet g_reg_sets_arm[k_num_register_sets] = { - {"General Purpose Registers", "gpr", k_num_gpr_registers_arm, - g_gpr_regnums_arm}, - {"Floating Point Registers", "fpu", k_num_fpr_registers_arm, - g_fpu_regnums_arm}}; - bool RegisterContextPOSIX_arm::IsGPR(unsigned reg) { - return reg <= m_reg_info.last_gpr; // GPR's come first. + if (m_register_info_up->GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::GPRegSet) + return true; + return false; } bool RegisterContextPOSIX_arm::IsFPR(unsigned reg) { - return (m_reg_info.first_fpr <= reg && reg <= m_reg_info.last_fpr); + if (m_register_info_up->GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::FPRegSet) + return true; + return false; } RegisterContextPOSIX_arm::RegisterContextPOSIX_arm( - lldb_private::Thread &thread, uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info) - : lldb_private::RegisterContext(thread, concrete_frame_idx) { - m_register_info_up.reset(register_info); - - switch (register_info->m_target_arch.GetMachine()) { - case llvm::Triple::arm: - m_reg_info.num_registers = k_num_registers_arm; - m_reg_info.num_gpr_registers = k_num_gpr_registers_arm; - m_reg_info.num_fpr_registers = k_num_fpr_registers_arm; - m_reg_info.last_gpr = k_last_gpr_arm; - m_reg_info.first_fpr = k_first_fpr_arm; - m_reg_info.last_fpr = k_last_fpr_arm; - m_reg_info.first_fpr_v = fpu_s0_arm; - m_reg_info.last_fpr_v = fpu_s31_arm; - m_reg_info.gpr_flags = gpr_cpsr_arm; - break; - default: - assert(false && "Unhandled target architecture."); - break; - } - - ::memset(&m_fpr, 0, sizeof m_fpr); -} + lldb_private::Thread &thread, + std::unique_ptr register_info) + : lldb_private::RegisterContext(thread, 0), + m_register_info_up(std::move(register_info)) {} RegisterContextPOSIX_arm::~RegisterContextPOSIX_arm() {} @@ -115,19 +52,15 @@ void RegisterContextPOSIX_arm::Invalidate() {} void RegisterContextPOSIX_arm::InvalidateAllRegisters() {} unsigned RegisterContextPOSIX_arm::GetRegisterOffset(unsigned reg) { - assert(reg < m_reg_info.num_registers && "Invalid register number."); - return GetRegisterInfo()[reg].byte_offset; + return m_register_info_up->GetRegisterInfo()[reg].byte_offset; } unsigned RegisterContextPOSIX_arm::GetRegisterSize(unsigned reg) { - assert(reg < m_reg_info.num_registers && "Invalid register number."); - return GetRegisterInfo()[reg].byte_size; + return m_register_info_up->GetRegisterInfo()[reg].byte_size; } size_t RegisterContextPOSIX_arm::GetRegisterCount() { - size_t num_registers = - m_reg_info.num_gpr_registers + m_reg_info.num_fpr_registers; - return num_registers; + return m_register_info_up->GetRegisterCount(); } size_t RegisterContextPOSIX_arm::GetGPRSize() { @@ -143,41 +76,23 @@ const lldb_private::RegisterInfo *RegisterContextPOSIX_arm::GetRegisterInfo() { const lldb_private::RegisterInfo * RegisterContextPOSIX_arm::GetRegisterInfoAtIndex(size_t reg) { - if (reg < m_reg_info.num_registers) + if (reg < GetRegisterCount()) return &GetRegisterInfo()[reg]; - else - return nullptr; + + return nullptr; } size_t RegisterContextPOSIX_arm::GetRegisterSetCount() { - size_t sets = 0; - for (size_t set = 0; set < k_num_register_sets; ++set) { - if (IsRegisterSetAvailable(set)) - ++sets; - } - - return sets; + return m_register_info_up->GetRegisterSetCount(); } const lldb_private::RegisterSet * RegisterContextPOSIX_arm::GetRegisterSet(size_t set) { - if (IsRegisterSetAvailable(set)) { - switch (m_register_info_up->m_target_arch.GetMachine()) { - case llvm::Triple::arm: - return &g_reg_sets_arm[set]; - default: - assert(false && "Unhandled target architecture."); - return nullptr; - } - } - return nullptr; + return m_register_info_up->GetRegisterSet(set); } const char *RegisterContextPOSIX_arm::GetRegisterName(unsigned reg) { - assert(reg < m_reg_info.num_registers && "Invalid register offset."); - return GetRegisterInfo()[reg].name; -} - -bool RegisterContextPOSIX_arm::IsRegisterSetAvailable(size_t set_index) { - return set_index < k_num_register_sets; + if (reg < GetRegisterCount()) + return GetRegisterInfo()[reg].name; + return nullptr; } diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h index d6967f05ed487..6e7d47d5e50a6 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h @@ -10,7 +10,7 @@ #define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTPOSIX_ARM_H #include "RegisterInfoInterface.h" -#include "lldb-arm-register-enums.h" +#include "RegisterInfoPOSIX_arm.h" #include "lldb/Target/RegisterContext.h" #include "lldb/Utility/Log.h" @@ -18,9 +18,9 @@ class ProcessMonitor; class RegisterContextPOSIX_arm : public lldb_private::RegisterContext { public: - RegisterContextPOSIX_arm(lldb_private::Thread &thread, - uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info); + RegisterContextPOSIX_arm( + lldb_private::Thread &thread, + std::unique_ptr register_info); ~RegisterContextPOSIX_arm() override; @@ -45,46 +45,7 @@ class RegisterContextPOSIX_arm : public lldb_private::RegisterContext { const char *GetRegisterName(unsigned reg); protected: - struct RegInfo { - uint32_t num_registers; - uint32_t num_gpr_registers; - uint32_t num_fpr_registers; - - uint32_t last_gpr; - uint32_t first_fpr; - uint32_t last_fpr; - - uint32_t first_fpr_v; - uint32_t last_fpr_v; - - uint32_t gpr_flags; - }; - - struct QReg { - uint8_t bytes[16]; - }; - - struct FPU { - union { - uint32_t s[32]; - uint64_t d[32]; - QReg q[16]; // the 128-bit NEON registers - } floats; - uint32_t fpscr; - }; - - uint32_t m_gpr_arm[lldb_private::k_num_gpr_registers_arm]; // 32-bit general - // purpose - // registers. - RegInfo m_reg_info; - struct RegisterContextPOSIX_arm::FPU - m_fpr; // floating-point registers including extended register sets. - std::unique_ptr - m_register_info_up; // Register Info Interface (FreeBSD or Linux) - - // Determines if an extended register set is supported on the processor - // running the inferior process. - virtual bool IsRegisterSetAvailable(size_t set_index); + std::unique_ptr m_register_info_up; virtual const lldb_private::RegisterInfo *GetRegisterInfo(); @@ -92,6 +53,8 @@ class RegisterContextPOSIX_arm : public lldb_private::RegisterContext { bool IsFPR(unsigned reg); + size_t GetFPUSize() { return sizeof(RegisterInfoPOSIX_arm::FPU); } + virtual bool ReadGPR() = 0; virtual bool ReadFPR() = 0; virtual bool WriteGPR() = 0; diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp index 8fc4d5282b06a..17b96f944cda2 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp @@ -71,9 +71,87 @@ GetRegisterInfoCount(const lldb_private::ArchSpec &target_arch) { } } +// Number of register sets provided by this context. +enum { + k_num_gpr_registers = gpr_cpsr - gpr_r0 + 1, + k_num_fpr_registers = fpu_q15 - fpu_s0 + 1, + k_num_register_sets = 2 +}; + +// arm general purpose registers. +static const uint32_t g_gpr_regnums_arm[] = { + gpr_r0, gpr_r1, + gpr_r2, gpr_r3, + gpr_r4, gpr_r5, + gpr_r6, gpr_r7, + gpr_r8, gpr_r9, + gpr_r10, gpr_r11, + gpr_r12, gpr_sp, + gpr_lr, gpr_pc, + gpr_cpsr, LLDB_INVALID_REGNUM // register sets need to end with this flag +}; +static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == + k_num_gpr_registers, + "g_gpr_regnums_arm has wrong number of register infos"); + +// arm floating point registers. +static const uint32_t g_fpu_regnums_arm[] = { + fpu_s0, fpu_s1, + fpu_s2, fpu_s3, + fpu_s4, fpu_s5, + fpu_s6, fpu_s7, + fpu_s8, fpu_s9, + fpu_s10, fpu_s11, + fpu_s12, fpu_s13, + fpu_s14, fpu_s15, + fpu_s16, fpu_s17, + fpu_s18, fpu_s19, + fpu_s20, fpu_s21, + fpu_s22, fpu_s23, + fpu_s24, fpu_s25, + fpu_s26, fpu_s27, + fpu_s28, fpu_s29, + fpu_s30, fpu_s31, + fpu_fpscr, fpu_d0, + fpu_d1, fpu_d2, + fpu_d3, fpu_d4, + fpu_d5, fpu_d6, + fpu_d7, fpu_d8, + fpu_d9, fpu_d10, + fpu_d11, fpu_d12, + fpu_d13, fpu_d14, + fpu_d15, fpu_d16, + fpu_d17, fpu_d18, + fpu_d19, fpu_d20, + fpu_d21, fpu_d22, + fpu_d23, fpu_d24, + fpu_d25, fpu_d26, + fpu_d27, fpu_d28, + fpu_d29, fpu_d30, + fpu_d31, fpu_q0, + fpu_q1, fpu_q2, + fpu_q3, fpu_q4, + fpu_q5, fpu_q6, + fpu_q7, fpu_q8, + fpu_q9, fpu_q10, + fpu_q11, fpu_q12, + fpu_q13, fpu_q14, + fpu_q15, LLDB_INVALID_REGNUM // register sets need to end with this flag +}; +static_assert(((sizeof g_fpu_regnums_arm / sizeof g_fpu_regnums_arm[0]) - 1) == + k_num_fpr_registers, + "g_fpu_regnums_arm has wrong number of register infos"); + +// Register sets for arm. +static const RegisterSet g_reg_sets_arm[k_num_register_sets] = { + {"General Purpose Registers", "gpr", k_num_gpr_registers, + g_gpr_regnums_arm}, + {"Floating Point Registers", "fpu", k_num_fpr_registers, + g_fpu_regnums_arm}}; + RegisterInfoPOSIX_arm::RegisterInfoPOSIX_arm( const lldb_private::ArchSpec &target_arch) - : lldb_private::RegisterInfoInterface(target_arch), + : lldb_private::RegisterInfoAndSetInterface(target_arch), m_register_info_p(GetRegisterInfoPtr(target_arch)), m_register_info_count(GetRegisterInfoCount(target_arch)) {} @@ -81,11 +159,35 @@ size_t RegisterInfoPOSIX_arm::GetGPRSize() const { return sizeof(struct RegisterInfoPOSIX_arm::GPR); } +size_t RegisterInfoPOSIX_arm::GetFPRSize() const { + return sizeof(struct RegisterInfoPOSIX_arm::FPU); +} + const lldb_private::RegisterInfo * RegisterInfoPOSIX_arm::GetRegisterInfo() const { return m_register_info_p; } +size_t RegisterInfoPOSIX_arm::GetRegisterSetCount() const { + return k_num_register_sets; +} + +size_t RegisterInfoPOSIX_arm::GetRegisterSetFromRegisterIndex( + uint32_t reg_index) const { + if (reg_index <= gpr_cpsr) + return GPRegSet; + if (reg_index <= fpu_q15) + return FPRegSet; + return LLDB_INVALID_REGNUM; +} + +const lldb_private::RegisterSet * +RegisterInfoPOSIX_arm::GetRegisterSet(size_t set_index) const { + if (set_index < GetRegisterSetCount()) + return &g_reg_sets_arm[set_index]; + return nullptr; +} + uint32_t RegisterInfoPOSIX_arm::GetRegisterCount() const { return m_register_info_count; } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h index 1cf896e3decfd..db155d757ca8c 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h @@ -9,12 +9,14 @@ #ifndef LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERINFOPOSIX_ARM_H #define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERINFOPOSIX_ARM_H -#include "RegisterInfoInterface.h" +#include "RegisterInfoAndSetInterface.h" #include "lldb/Target/RegisterContext.h" #include "lldb/lldb-private.h" -class RegisterInfoPOSIX_arm : public lldb_private::RegisterInfoInterface { +class RegisterInfoPOSIX_arm : public lldb_private::RegisterInfoAndSetInterface { public: + enum { GPRegSet = 0, FPRegSet}; + struct GPR { uint32_t r[16]; // R0-R15 uint32_t cpsr; // CPSR @@ -49,10 +51,19 @@ class RegisterInfoPOSIX_arm : public lldb_private::RegisterInfoInterface { size_t GetGPRSize() const override; + size_t GetFPRSize() const override; + const lldb_private::RegisterInfo *GetRegisterInfo() const override; uint32_t GetRegisterCount() const override; + const lldb_private::RegisterSet * + GetRegisterSet(size_t reg_set) const override; + + size_t GetRegisterSetCount() const override; + + size_t GetRegisterSetFromRegisterIndex(uint32_t reg_index) const override; + private: const lldb_private::RegisterInfo *m_register_info_p; uint32_t m_register_info_count; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp index b76f26a584c04..2f71f175a00d9 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp @@ -16,9 +16,9 @@ using namespace lldb_private; RegisterContextCorePOSIX_arm::RegisterContextCorePOSIX_arm( - Thread &thread, RegisterInfoInterface *register_info, + Thread &thread, std::unique_ptr register_info, const DataExtractor &gpregset, llvm::ArrayRef notes) - : RegisterContextPOSIX_arm(thread, 0, register_info) { + : RegisterContextPOSIX_arm(thread, std::move(register_info)) { m_gpr_buffer = std::make_shared(gpregset.GetDataStart(), gpregset.GetByteSize()); m_gpr.SetData(m_gpr_buffer); diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h index f9ec08ed35fcf..de343f9001e06 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h @@ -18,7 +18,7 @@ class RegisterContextCorePOSIX_arm : public RegisterContextPOSIX_arm { public: RegisterContextCorePOSIX_arm( lldb_private::Thread &thread, - lldb_private::RegisterInfoInterface *register_info, + std::unique_ptr register_info, const lldb_private::DataExtractor &gpregset, llvm::ArrayRef notes); diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp index 6b5acfa4bc1bb..76c0c2843e6df 100644 --- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp @@ -82,9 +82,7 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { case llvm::Triple::FreeBSD: { switch (arch.GetMachine()) { case llvm::Triple::aarch64: - break; case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(arch); break; case llvm::Triple::ppc: reg_interface = new RegisterContextFreeBSD_powerpc32(arch); @@ -122,9 +120,6 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { case llvm::Triple::Linux: { switch (arch.GetMachine()) { - case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(arch); - break; case llvm::Triple::aarch64: break; case llvm::Triple::mipsel: @@ -157,9 +152,6 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { switch (arch.GetMachine()) { case llvm::Triple::aarch64: break; - case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(arch); - break; case llvm::Triple::x86: reg_interface = new RegisterContextOpenBSD_i386(arch); break; @@ -176,7 +168,8 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { break; } - if (!reg_interface && arch.GetMachine() != llvm::Triple::aarch64) { + if (!reg_interface && arch.GetMachine() != llvm::Triple::aarch64 && + arch.GetMachine() != llvm::Triple::arm) { LLDB_LOGF(log, "elf-core::%s:: Architecture(%d) or OS(%d) not supported", __FUNCTION__, arch.GetMachine(), arch.GetTriple().getOS()); assert(false && "Architecture or OS not supported"); @@ -190,7 +183,8 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { break; case llvm::Triple::arm: m_thread_reg_ctx_sp = std::make_shared( - *this, reg_interface, m_gpregset_data, m_notes); + *this, std::make_unique(arch), m_gpregset_data, + m_notes); break; case llvm::Triple::mipsel: case llvm::Triple::mips: diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 0949b99185234..dd0f69841aa70 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -2812,7 +2812,7 @@ lldb::addr_t GDBRemoteCommunicationClient::GetShlibInfoAddr() { } lldb_private::Status GDBRemoteCommunicationClient::RunShellCommand( - const char *command, // Shouldn't be NULL + llvm::StringRef command, const FileSpec & working_dir, // Pass empty FileSpec to use the current working directory int *status_ptr, // Pass NULL if you don't want the process exit status @@ -2823,7 +2823,7 @@ lldb_private::Status GDBRemoteCommunicationClient::RunShellCommand( const Timeout &timeout) { lldb_private::StreamString stream; stream.PutCString("qPlatform_shell:"); - stream.PutBytesAsRawHex8(command, strlen(command)); + stream.PutBytesAsRawHex8(command.data(), command.size()); stream.PutChar(','); uint32_t timeout_sec = UINT32_MAX; if (timeout) { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index 0159125a433b8..61acfad5d3136 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -399,7 +399,7 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { bool GetFileExists(const FileSpec &file_spec); Status RunShellCommand( - const char *command, // Shouldn't be nullptr + llvm::StringRef command, const FileSpec &working_dir, // Pass empty FileSpec to use the current // working directory int *status_ptr, // Pass nullptr if you don't want the process exit status diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index eeec7296747e2..07e5b284eab81 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -326,7 +326,8 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { } const SectionList &list = *module.GetSectionList(); - llvm::DenseMap symbols; + llvm::DenseSet found_symbol_addresses; + std::vector symbols; auto add_symbol = [&](addr_t address, llvm::Optional size, llvm::StringRef name) { address += base; @@ -338,8 +339,12 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { name, address); return; } - symbols.try_emplace( - address, /*symID*/ 0, Mangled(name), eSymbolTypeCode, + // Keep track of what addresses were already added so far and only add + // the symbol with the first address. + if (!found_symbol_addresses.insert(address).second) + return; + symbols.emplace_back( + /*symID*/ 0, Mangled(name), eSymbolTypeCode, /*is_global*/ true, /*is_debug*/ false, /*is_trampoline*/ false, /*is_artificial*/ false, AddressRange(section_sp, address - section_sp->GetFileAddress(), @@ -359,8 +364,8 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { LLDB_LOG(log, "Failed to parse: {0}. Skipping record.", line); } - for (auto &KV : symbols) - symtab.AddSymbol(std::move(KV.second)); + for (Symbol &symbol : symbols) + symtab.AddSymbol(std::move(symbol)); symtab.CalculateSymbolSizes(); } diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index e867b8db47232..7416ea6dd40c1 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -1319,7 +1319,23 @@ MmapArgList Platform::GetMmapArgumentList(const ArchSpec &arch, addr_t addr, } lldb_private::Status Platform::RunShellCommand( - const char *command, // Shouldn't be nullptr + llvm::StringRef command, + const FileSpec & + working_dir, // Pass empty FileSpec to use the current working directory + int *status_ptr, // Pass nullptr if you don't want the process exit status + int *signo_ptr, // Pass nullptr if you don't want the signal that caused the + // process to exit + std::string + *command_output, // Pass nullptr if you don't want the command output + const Timeout &timeout) { + return RunShellCommand(llvm::StringRef(), command, working_dir, status_ptr, + signo_ptr, command_output, timeout); +} + +lldb_private::Status Platform::RunShellCommand( + llvm::StringRef shell, // Pass empty if you want to use the default + // shell interpreter + llvm::StringRef command, // Shouldn't be empty const FileSpec & working_dir, // Pass empty FileSpec to use the current working directory int *status_ptr, // Pass nullptr if you don't want the process exit status @@ -1329,8 +1345,8 @@ lldb_private::Status Platform::RunShellCommand( *command_output, // Pass nullptr if you don't want the command output const Timeout &timeout) { if (IsHost()) - return Host::RunShellCommand(command, working_dir, status_ptr, signo_ptr, - command_output, timeout); + return Host::RunShellCommand(shell, command, working_dir, status_ptr, + signo_ptr, command_output, timeout); else return Status("unimplemented"); } diff --git a/lldb/source/Target/RemoteAwarePlatform.cpp b/lldb/source/Target/RemoteAwarePlatform.cpp index f53158b06b8f9..3a186adca04c7 100644 --- a/lldb/source/Target/RemoteAwarePlatform.cpp +++ b/lldb/source/Target/RemoteAwarePlatform.cpp @@ -171,15 +171,24 @@ Status RemoteAwarePlatform::ResolveExecutable( } Status RemoteAwarePlatform::RunShellCommand( - const char *command, const FileSpec &working_dir, int *status_ptr, + llvm::StringRef command, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout) { + return RunShellCommand(llvm::StringRef(), command, working_dir, status_ptr, + signo_ptr, command_output, timeout); +} + +Status RemoteAwarePlatform::RunShellCommand( + llvm::StringRef shell, llvm::StringRef command, const FileSpec &working_dir, + int *status_ptr, int *signo_ptr, std::string *command_output, + const Timeout &timeout) { if (IsHost()) - return Host::RunShellCommand(command, working_dir, status_ptr, signo_ptr, - command_output, timeout); + return Host::RunShellCommand(shell, command, working_dir, status_ptr, + signo_ptr, command_output, timeout); if (m_remote_platform_sp) - return m_remote_platform_sp->RunShellCommand( - command, working_dir, status_ptr, signo_ptr, command_output, timeout); + return m_remote_platform_sp->RunShellCommand(shell, command, working_dir, + status_ptr, signo_ptr, + command_output, timeout); return Status("unable to run a remote command without a platform"); } diff --git a/lldb/source/Utility/Reproducer.cpp b/lldb/source/Utility/Reproducer.cpp index 68c64195f55ee..1f9ab8d37174e 100644 --- a/lldb/source/Utility/Reproducer.cpp +++ b/lldb/source/Utility/Reproducer.cpp @@ -268,3 +268,94 @@ bool Loader::HasFile(StringRef file) { auto it = std::lower_bound(m_files.begin(), m_files.end(), file.str()); return (it != m_files.end()) && (*it == file); } + +void Verifier::Verify( + llvm::function_ref error_callback, + llvm::function_ref warning_callback, + llvm::function_ref note_callack) const { + if (!m_loader) { + error_callback("invalid loader"); + return; + } + + FileSpec vfs_mapping = m_loader->GetFile(); + ErrorOr> buffer = + vfs::getRealFileSystem()->getBufferForFile(vfs_mapping.GetPath()); + if (!buffer) { + error_callback("unable to read files: " + buffer.getError().message()); + return; + } + + IntrusiveRefCntPtr vfs = vfs::getVFSFromYAML( + std::move(buffer.get()), nullptr, vfs_mapping.GetPath()); + if (!vfs) { + error_callback("unable to initialize the virtual file system"); + return; + } + + auto &redirecting_vfs = static_cast(*vfs); + redirecting_vfs.setFallthrough(false); + + { + llvm::Expected working_dir = + GetDirectoryFrom(m_loader); + if (working_dir) { + if (!vfs->exists(*working_dir)) + warning_callback("working directory '" + *working_dir + "' not in VFS"); + vfs->setCurrentWorkingDirectory(*working_dir); + } else { + warning_callback("no working directory in reproducer: " + + toString(working_dir.takeError())); + } + } + + { + llvm::Expected home_dir = + GetDirectoryFrom(m_loader); + if (home_dir) { + if (!vfs->exists(*home_dir)) + warning_callback("home directory '" + *home_dir + "' not in VFS"); + } else { + warning_callback("no home directory in reproducer: " + + toString(home_dir.takeError())); + } + } + + { + Expected symbol_files = + m_loader->LoadBuffer(); + if (symbol_files) { + std::vector entries; + llvm::yaml::Input yin(*symbol_files); + yin >> entries; + for (const auto &entry : entries) { + if (!entry.module_path.empty() && !vfs->exists(entry.module_path)) { + warning_callback("'" + entry.module_path + "': module path for " + + entry.uuid + " not in VFS"); + } + if (!entry.symbol_path.empty() && !vfs->exists(entry.symbol_path)) { + warning_callback("'" + entry.symbol_path + "': symbol path for " + + entry.uuid + " not in VFS"); + } + } + } else { + llvm::consumeError(symbol_files.takeError()); + } + } + + // Missing files in the VFS are notes rather than warnings. Because the VFS + // is a snapshot, temporary files could have been removed between when they + // were recorded and when the reproducer was generated. + std::vector roots = redirecting_vfs.getRoots(); + for (llvm::StringRef root : roots) { + std::error_code ec; + vfs::recursive_directory_iterator iter(*vfs, root, ec); + vfs::recursive_directory_iterator end; + for (; iter != end && !ec; iter.increment(ec)) { + ErrorOr status = vfs->status(iter->path()); + if (!status) + note_callack("'" + iter->path().str() + + "': " + status.getError().message()); + } + } +} diff --git a/lldb/source/Utility/ReproducerProvider.cpp b/lldb/source/Utility/ReproducerProvider.cpp index f5556659390bf..d67c886708a2f 100644 --- a/lldb/source/Utility/ReproducerProvider.cpp +++ b/lldb/source/Utility/ReproducerProvider.cpp @@ -9,6 +9,7 @@ #include "lldb/Utility/ReproducerProvider.h" #include "lldb/Utility/ProcessInfo.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" using namespace lldb_private; diff --git a/lldb/test/API/assert_messages_test/TestAssertMessages.py b/lldb/test/API/assert_messages_test/TestAssertMessages.py new file mode 100644 index 0000000000000..6619a65ad69ea --- /dev/null +++ b/lldb/test/API/assert_messages_test/TestAssertMessages.py @@ -0,0 +1,115 @@ +""" +Test the format of API test suite assert failure messages +""" + + +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * +from textwrap import dedent + + +class AssertMessagesTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True + + def assert_expect_fails_with(self, cmd, expect_args, expected_msg): + try: + # This expect should fail + self.expect(cmd, **expect_args) + except AssertionError as e: + # Then check message from previous expect + self.expect(str(e), exe=False, substrs=[dedent(expected_msg)]) + else: + self.fail("Initial expect should have raised AssertionError!") + + def test_expect(self): + """Test format of messages produced by expect(...)""" + + # When an expect passes the messages are sent to the trace + # file which we can't access here. So really, these only + # check what failures look like, but it *should* be the same + # content for the trace log too. + + # Will stop at startstr fail + self.assert_expect_fails_with("settings list prompt", + dict(startstr="dog", endstr="cat"), + """\ + Ran command: + "settings list prompt" + + Got output: + prompt -- The debugger command line prompt displayed for the user. + + Expecting start string: "dog" (was not found)""") + + # startstr passes, endstr fails + # We see both reported + self.assert_expect_fails_with("settings list prompt", + dict(startstr=" prompt -- ", endstr="foo"), + """\ + Ran command: + "settings list prompt" + + Got output: + prompt -- The debugger command line prompt displayed for the user. + + Expecting start string: " prompt -- " (was found) + Expecting end string: "foo" (was not found)""") + + # Same thing for substrs, regex patterns ignored because of substr failure + # Any substr after the first missing is also ignored + self.assert_expect_fails_with("abcdefg", + dict(substrs=["abc", "ijk", "xyz"], + patterns=["foo", "bar"], exe=False), + """\ + Checking string: + "abcdefg" + + Expecting sub string: "abc" (was found) + Expecting sub string: "ijk" (was not found)""") + + # Regex patterns also stop at first failure, subsequent patterns ignored + # They are last in the chain so no other check gets skipped + # Including the rest of the conditions here to prove they are run and shown + self.assert_expect_fails_with("0123456789", + dict(startstr="012", endstr="789", substrs=["345", "678"], + patterns=["[0-9]+", "[a-f]+", "a|b|c"], exe=False), + """\ + Checking string: + "0123456789" + + Expecting start string: "012" (was found) + Expecting end string: "789" (was found) + Expecting sub string: "345" (was found) + Expecting sub string: "678" (was found) + Expecting regex pattern: "[0-9]+" (was found, matched "0123456789") + Expecting regex pattern: "[a-f]+" (was not found)""") + + # This time we dont' want matches but we do get them + self.assert_expect_fails_with("the quick brown fox", + # Note that the second pattern *will* match + dict(patterns=["[0-9]+", "fox"], exe=False, matching=False, + startstr="cat", endstr="rabbit", substrs=["abc", "def"]), + """\ + Checking string: + "the quick brown fox" + + Not expecting start string: "cat" (was not found) + Not expecting end string: "rabbit" (was not found) + Not expecting sub string: "abc" (was not found) + Not expecting sub string: "def" (was not found) + Not expecting regex pattern: "[0-9]+" (was not found) + Not expecting regex pattern: "fox" (was found, matched "fox")""") + + # Extra assert messages are only printed when we get a failure + # So I can't test that from here, just how it looks when it's printed + self.assert_expect_fails_with("mouse", + dict(startstr="cat", exe=False, msg="Reason for check goes here!"), + """\ + Checking string: + "mouse" + + Expecting start string: "cat" (was not found) + Reason for check goes here!""") diff --git a/lldb/test/API/commands/platform/basic/Makefile b/lldb/test/API/commands/platform/basic/Makefile new file mode 100644 index 0000000000000..3626466f607c1 --- /dev/null +++ b/lldb/test/API/commands/platform/basic/Makefile @@ -0,0 +1,5 @@ +C_SOURCES := myshell.c +CFLAGS_EXTRAS := -g0 # No debug info. +MAKE_DSYM := NO + +include Makefile.rules diff --git a/lldb/test/API/commands/platform/basic/TestPlatformCommand.py b/lldb/test/API/commands/platform/basic/TestPlatformCommand.py index 570f9b3f828db..dc1701258246a 100644 --- a/lldb/test/API/commands/platform/basic/TestPlatformCommand.py +++ b/lldb/test/API/commands/platform/basic/TestPlatformCommand.py @@ -13,6 +13,7 @@ class PlatformCommandTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True @no_debug_info_test def test_help_platform(self): @@ -92,3 +93,11 @@ def test_shell_timeout(self): "error: timed out waiting for shell command to complete"]) self.expect("shell -t 1 -- sleep 3", error=True, substrs=[ "error: timed out waiting for shell command to complete"]) + + @no_debug_info_test + def test_host_shell_interpreter(self): + """ Test the host platform shell with a different interpreter """ + self.build() + exe = self.getBuildArtifact('a.out') + self.expect("platform shell -h -s " + exe + " -- 'echo $0'", + substrs=['SUCCESS', 'a.out']) diff --git a/lldb/test/API/commands/platform/basic/TestPlatformPython.py b/lldb/test/API/commands/platform/basic/TestPlatformPython.py index ab10d30b6ff57..0063621e58007 100644 --- a/lldb/test/API/commands/platform/basic/TestPlatformPython.py +++ b/lldb/test/API/commands/platform/basic/TestPlatformPython.py @@ -79,3 +79,20 @@ def test_available_platform_list(self): self.assertEqual( desc_data.GetType(), lldb.eStructuredDataTypeString, 'Platform description is a string') + + @add_test_categories(['pyapi']) + @no_debug_info_test + def test_shell_interpreter(self): + """ Test a shell with a custom interpreter """ + platform = self.dbg.GetSelectedPlatform() + self.assertTrue(platform.IsValid()) + + sh_cmd = lldb.SBPlatformShellCommand('/bin/zsh', 'echo $0') + self.assertIn('/bin/zsh', sh_cmd.GetShell()) + self.assertIn('echo $0', sh_cmd.GetCommand()) + + self.build() + sh_cmd.SetShell(self.getBuildArtifact('a.out')) + err = platform.Run(sh_cmd) + self.assertTrue(err.Success()) + self.assertIn("SUCCESS", sh_cmd.GetOutput()) diff --git a/lldb/test/API/commands/platform/basic/myshell.c b/lldb/test/API/commands/platform/basic/myshell.c new file mode 100644 index 0000000000000..8fef648de651c --- /dev/null +++ b/lldb/test/API/commands/platform/basic/myshell.c @@ -0,0 +1,24 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + if (argc < 3) { + fprintf(stderr, "ERROR: Too few arguments (count: %d).\n", argc - 1); + exit(1); + } + +#if defined(_WIN32) || defined(_WIN64) + char *cmd_opt = "/C"; +#else + char *cmd_opt = "-c"; +#endif + + if (strncmp(argv[1], cmd_opt, 2)) { + fprintf(stderr, "ERROR: Missing shell command option ('%s').\n", cmd_opt); + exit(1); + } + + printf("SUCCESS: %s\n", argv[0]); + return 0; +} diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index 42d30f6cb1137..b243a6692d852 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -10,29 +10,6 @@ class RegisterCommandsTestCase(TestBase): - def targetHasSVE(self): - triple = self.dbg.GetSelectedPlatform().GetTriple() - - # TODO other platforms, please implement this function - if not re.match(".*-.*-linux", triple): - return False - - # Need to do something different for non-Linux/Android targets - cpuinfo_path = self.getBuildArtifact("cpuinfo") - if configuration.lldb_platform_name: - self.runCmd('platform get-file "/proc/cpuinfo" ' + cpuinfo_path) - else: - cpuinfo_path = "/proc/cpuinfo" - - try: - f = open(cpuinfo_path, 'r') - cpuinfo = f.read() - f.close() - except: - return False - - return " sve " in cpuinfo - def check_sve_register_size(self, set, name, expected): reg_value = set.GetChildMemberWithName(name) self.assertTrue(reg_value.IsValid(), @@ -53,7 +30,7 @@ def test_sve_registers_configuration(self): exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.targetHasSVE(): + if not self.isAArch64SVE(): self.skipTest('SVE registers must be supported.') lldbutil.run_break_set_by_file_and_line( @@ -108,7 +85,7 @@ def test_sve_registers_read_write(self): exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.targetHasSVE(): + if not self.isAArch64SVE(): self.skipTest('SVE registers must be supported.') lldbutil.run_break_set_by_file_and_line( diff --git a/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py b/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py index e949f1a1a07e9..1653fe36af7b0 100644 --- a/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py +++ b/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py @@ -6,6 +6,7 @@ import lldb +from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * import lldbsuite.test.lldbutil as lldbutil @@ -20,6 +21,7 @@ def setUp(self): # Find the line number to break at. self.line = line_number('main.cpp', '// Set break point at this line.') + @no_debug_info_test def test_with_run_command(self): """Test that deeply nested ValueObjects still work.""" self.build() diff --git a/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py b/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py index c58f700039eb3..d40eee0cb1b0e 100644 --- a/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py +++ b/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py @@ -1,7 +1,5 @@ """Look up enum type information and check for correct display.""" - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -12,145 +10,45 @@ class CPP11EnumTypesTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int8_t(self): - """Test C++11 enumeration class types as int8_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int8_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int16_t(self): - """Test C++11 enumeration class types as int16_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int16_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int32_t(self): - """Test C++11 enumeration class types as int32_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int32_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int64_t(self): - """Test C++11 enumeration class types as int64_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int64_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint8_t(self): - """Test C++11 enumeration class types as uint8_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint8_t"'}) - self.image_lookup_for_enum_type(False) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint16_t(self): - """Test C++11 enumeration class types as uint16_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint16_t"'}) - self.image_lookup_for_enum_type(False) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint32_t(self): - """Test C++11 enumeration class types as uint32_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint32_t"'}) - self.image_lookup_for_enum_type(False) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint64_t(self): - """Test C++11 enumeration class types as uint64_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint64_t"'}) - self.image_lookup_for_enum_type(False) - - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break inside main(). - self.line = line_number('main.cpp', '// Set break point at this line.') - - def image_lookup_for_enum_type(self, is_signed): - """Test C++11 enumeration class types.""" - exe = self.getBuildArtifact("a.out") - self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - - # Break inside the main. - bkpt_id = lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=1, loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - # The breakpoint should have a hit count of 1. - self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE, - substrs=[' resolved, hit count = 1']) - - # Look up information about the 'DayType' enum type. - # Check for correct display. - self.expect("image lookup -t DayType", DATA_TYPES_DISPLAYED_CORRECTLY, - patterns=['enum( struct| class) DayType {'], - substrs=['Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'kNumDays', - '}']) - - if is_signed: - enum_values = ['-4', - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'kNumDays', - '5'] + def check_enum(self, suffix): + """ + :param suffix The suffix of the enum type name (enum_) that + should be checked. + :param test_values A list of integet values that shouldn't be converted + to any valid enum case. + """ + enum_name = "enum_" + suffix + unsigned = suffix.startswith("u") + + self.expect("image lookup -t " + enum_name, + patterns=["enum( struct| class) " + enum_name + " {"], + substrs=["Case1", + "Case2", + "Case3"]) + # Test each case in the enum. + self.expect_expr("var1_" + suffix, result_type=enum_name, result_value="Case1") + self.expect_expr("var2_" + suffix, result_type=enum_name, result_value="Case2") + self.expect_expr("var3_" + suffix, result_type=enum_name, result_value="Case3") + + if unsigned: + self.expect_expr("var_below_" + suffix, result_type=enum_name, result_value="199") + self.expect_expr("var_above_" + suffix, result_type=enum_name, result_value="203") else: - enum_values = ['199', - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'kNumDays', - '208'] - - bkpt = self.target().FindBreakpointByID(bkpt_id) - for enum_value in enum_values: - self.expect( - "frame variable day", - 'check for valid enumeration value', - substrs=[enum_value]) - lldbutil.continue_to_breakpoint(self.process(), bkpt) + self.expect_expr("var_below_" + suffix, result_type=enum_name, result_value="-3") + self.expect_expr("var_above_" + suffix, result_type=enum_name, result_value="1") + + @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') + @skipIf(dwarf_version=['<', '4']) + def test(self): + self.build() + target = self.dbg.CreateTarget(self.getBuildArtifact("a.out")) + self.check_enum("uc") + self.check_enum("c") + self.check_enum("us") + self.check_enum("s") + self.check_enum("ui") + self.check_enum("i") + self.check_enum("ul") + self.check_enum("l") + self.check_enum("ull") + self.check_enum("ll") diff --git a/lldb/test/API/lang/cpp/enum_types/main.cpp b/lldb/test/API/lang/cpp/enum_types/main.cpp index d7d428a24432b..be895208c7d40 100644 --- a/lldb/test/API/lang/cpp/enum_types/main.cpp +++ b/lldb/test/API/lang/cpp/enum_types/main.cpp @@ -1,41 +1,28 @@ -#include -#include +#define DEFINE_UNSIGNED_ENUM(suffix, enum_type) \ + enum class enum_##suffix : enum_type{Case1 = 200, Case2, Case3}; \ + enum_##suffix var1_##suffix = enum_##suffix ::Case1; \ + enum_##suffix var2_##suffix = enum_##suffix ::Case2; \ + enum_##suffix var3_##suffix = enum_##suffix ::Case3; \ + enum_##suffix var_below_##suffix = static_cast(199); \ + enum_##suffix var_above_##suffix = static_cast(203); +#define DEFINE_SIGNED_ENUM(suffix, enum_type) \ + enum class enum_##suffix : enum_type{Case1 = -2, Case2, Case3}; \ + enum_##suffix var1_##suffix = enum_##suffix ::Case1; \ + enum_##suffix var2_##suffix = enum_##suffix ::Case2; \ + enum_##suffix var3_##suffix = enum_##suffix ::Case3; \ + enum_##suffix var_below_##suffix = static_cast(-3); \ + enum_##suffix var_above_##suffix = static_cast(1); -int main (int argc, char const *argv[]) -{ -#ifdef SIGNED_ENUM_CLASS_TYPE - typedef SIGNED_ENUM_CLASS_TYPE enum_integer_t; - enum class DayType : enum_integer_t { - Monday = -3, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday, - Sunday, - kNumDays - }; - enum_integer_t day_value; -#else - typedef UNSIGNED_ENUM_CLASS_TYPE enum_integer_t; - enum class DayType : enum_integer_t { - Monday = 200, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday, - Sunday, - kNumDays - }; - enum_integer_t day_value; -#endif +DEFINE_UNSIGNED_ENUM(uc, unsigned char) +DEFINE_SIGNED_ENUM(c, signed char) +DEFINE_UNSIGNED_ENUM(us, unsigned short int) +DEFINE_SIGNED_ENUM(s, signed short int) +DEFINE_UNSIGNED_ENUM(ui, unsigned int) +DEFINE_SIGNED_ENUM(i, signed int) +DEFINE_UNSIGNED_ENUM(ul, unsigned long) +DEFINE_SIGNED_ENUM(l, signed long) +DEFINE_UNSIGNED_ENUM(ull, unsigned long long) +DEFINE_SIGNED_ENUM(ll, signed long long) - for (day_value = (enum_integer_t)DayType::Monday - 1; day_value <= (enum_integer_t)DayType::kNumDays + 1; ++day_value) - { - DayType day = (DayType)day_value; - printf("day as int is %i\n", (int)day); // Set break point at this line. - } - return 0; // Break here for char tests -} +int main(int argc, char const *argv[]) { return 0; } diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py index db70e4a8124b6..7fa5f7d45267f 100644 --- a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py +++ b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py @@ -38,7 +38,8 @@ def run_test(self, symbol_basename, expect_debug_info_size): def checkSymbolsLoadedWithSize(): active_modules = self.vscode.get_active_modules() program_module = active_modules[program_basename] - symbolsStatus = program_module['debugInfoSize'] + self.assertIn('symbolFilePath', program_module) + self.assertIn(symbols_path, program_module['symbolFilePath']) symbol_regex = re.compile(r"[0-9]+(\.[0-9]*)?[KMG]?B") return symbol_regex.match(program_module['symbolStatus']) @@ -48,8 +49,6 @@ def checkSymbolsLoadedWithSize(): program_module = active_modules[program_basename] self.assertEqual(program_basename, program_module['name']) self.assertEqual(program, program_module['path']) - self.assertIn('symbolFilePath', program_module) - self.assertIn(symbols_path, program_module['symbolFilePath']) self.assertIn('addressRange', program_module) @skipIfWindows diff --git a/lldb/test/Shell/Process/Inputs/abort.c b/lldb/test/Shell/Process/Inputs/abort.c new file mode 100644 index 0000000000000..9edc9336dc3e0 --- /dev/null +++ b/lldb/test/Shell/Process/Inputs/abort.c @@ -0,0 +1,3 @@ +#include + +int main(int argc, char **argv) { abort(); } diff --git a/lldb/test/Shell/Process/TestAbortExitCode.test b/lldb/test/Shell/Process/TestAbortExitCode.test new file mode 100644 index 0000000000000..5be0a15ab1728 --- /dev/null +++ b/lldb/test/Shell/Process/TestAbortExitCode.test @@ -0,0 +1,6 @@ +UNSUPPORTED: system-windows + +RUN: %clang_host %p/Inputs/abort.c -o %t +RUN: %lldb %t -o run -o continue | FileCheck %s + +CHECK: status = 6 (0x00000006) diff --git a/lldb/test/Shell/Reproducer/TestDebugSymbols.test b/lldb/test/Shell/Reproducer/TestDebugSymbols.test index 6a3cc1249cbd1..986452ec35e86 100644 --- a/lldb/test/Shell/Reproducer/TestDebugSymbols.test +++ b/lldb/test/Shell/Reproducer/TestDebugSymbols.test @@ -12,3 +12,7 @@ # DUMP: uuid: AD52358C-94F8-3796-ADD6-B20FFAC00E5C # DUMP-NEXT: module path: /path/to/unstripped/executable # DUMP-NEXT: symbol path: /path/to/foo.dSYM/Contents/Resources/DWARF/foo + +# RUN: not %lldb -b -o 'reproducer verify -f %t.repro' 2>&1 | FileCheck %s --check-prefix VERIFY +# VERIFY: warning: '/path/to/unstripped/executable': module path for AD52358C-94F8-3796-ADD6-B20FFAC00E5C not in VFS +# VERIFY: warning: '/path/to/foo.dSYM/Contents/Resources/DWARF/foo': symbol path for AD52358C-94F8-3796-ADD6-B20FFAC00E5C not in VFS diff --git a/lldb/test/Shell/Reproducer/TestVerify.test b/lldb/test/Shell/Reproducer/TestVerify.test new file mode 100644 index 0000000000000..0b34e62aab558 --- /dev/null +++ b/lldb/test/Shell/Reproducer/TestVerify.test @@ -0,0 +1,27 @@ +# RUN: rm -rf %t.repro +# RUN: rm -rf %t.repro2 +# RUN: %clang_host %S/Inputs/simple.c -g -o %t.out +# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteCapture.in --capture --capture-path %t.repro %t.out +# RUN: %lldb --replay %t.repro + +# RUN: echo "/bogus/home/dir" > %t.repro/home.txt +# RUN: echo "/bogus/current/working/dir" > %t.repro/cwd.txt + +# RUN: not %lldb -b -o 'reproducer verify -f %t.repro' 2>&1 | FileCheck %s +# CHECK: working directory '/bogus/current/working/dir' not in VFS +# CHECK: home directory '/bogus/home/dir' not in VFS + +# RUN: rm %t.repro/root/%S/Inputs/GDBRemoteCapture.in +# RUN: echo "CHECK: '%S/Inputs/GDBRemoteCapture.in': No such file or directory" > %t.check +# RUN: not %lldb -b -o 'reproducer verify -f %t.repro' 2>&1 | FileCheck %t.check + +# RUN: not %lldb --replay %t.repro 2>&1 | FileCheck %s + +# At this point the reproducer is too broken to ignore the verification issues. +# Capture a new reproducer and only change the home directory, which is +# recoverable as far as this test goes. + +# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteCapture.in --capture --capture-path %t.repro2 %t.out +# RUN: echo "/bogus/home/dir" > %t.repro2/home.txt +# RUN: %lldb --replay %t.repro2 --reproducer-no-verify 2>&1 | FileCheck %s --check-prefix NO-VERIFY +# NO-VERIFY-NOT: home directory '/bogus/home/dir' not in VFS diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test index a02d94c30aa3a..1eb03fa43deb0 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -6,10 +6,10 @@ # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out -# CHECK: [ 1] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 -# CHECK: [ 2] 0 X Code 0x00000000004000d0 0x0000000000000022 0x00000000 _start -# CHECK: [ 3] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only -# CHECK: [ 4] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func +# CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func +# CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only +# CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 +# CHECK: [ 4] 0 X Code 0x00000000004000d0 0x0000000000000022 0x00000000 _start # CHECK-LABEL: (lldb) image lookup -a 0x4000b0 -v # CHECK: Address: symtab.out[0x00000000004000b0] (symtab.out.PT_LOAD[0]..text2 + 0) diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index 5e2512731f39c..b66cc8f583e8e 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -3066,7 +3066,7 @@ rnb_err_t RNBRemote::HandlePacket_last_signal(const char *unused) { WEXITSTATUS(pid_status)); else if (WIFSIGNALED(pid_status)) snprintf(pid_exited_packet, sizeof(pid_exited_packet), "X%02x", - WEXITSTATUS(pid_status)); + WTERMSIG(pid_status)); else if (WIFSTOPPED(pid_status)) snprintf(pid_exited_packet, sizeof(pid_exited_packet), "S%02x", WSTOPSIG(pid_status)); diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index 3837d06ed8d81..79720ddd1bf60 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -800,9 +800,11 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) { llvm::Optional InitializeReproducer(llvm::StringRef argv0, opt::InputArgList &input_args) { if (auto *replay_path = input_args.getLastArg(OPT_replay)) { - const bool no_version_check = input_args.hasArg(OPT_no_version_check); + SBReplayOptions replay_options; + replay_options.SetCheckVersion(!input_args.hasArg(OPT_no_version_check)); + replay_options.SetVerify(!input_args.hasArg(OPT_no_verification)); if (const char *error = - SBReproducer::Replay(replay_path->getValue(), no_version_check)) { + SBReproducer::Replay(replay_path->getValue(), replay_options)) { WithColor::error() << "reproducer replay failed: " << error << '\n'; return 1; } diff --git a/lldb/tools/driver/Options.td b/lldb/tools/driver/Options.td index 96f696ec3ca6e..b3ffc2d694eff 100644 --- a/lldb/tools/driver/Options.td +++ b/lldb/tools/driver/Options.td @@ -234,6 +234,8 @@ def replay: Separate<["--", "-"], "replay">, HelpText<"Tells the debugger to replay a reproducer from .">; def no_version_check: F<"reproducer-no-version-check">, HelpText<"Disable the reproducer version check.">; +def no_verification: F<"reproducer-no-verify">, + HelpText<"Disable the reproducer verification.">; def no_generate_on_signal: F<"reproducer-no-generate-on-signal">, HelpText<"Don't generate reproducer when a signal is received.">; def generate_on_exit: F<"reproducer-generate-on-exit">, diff --git a/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp index 1612babb07e87..e4574bebbcb8c 100644 --- a/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp @@ -1335,23 +1335,24 @@ void OCL20ToSPIRV::visitCallRelational(CallInst *CI, StringRef DemangledName) { if (CI->getOperand(0)->getType()->isVectorTy()) Ret = FixedVectorType::get( Type::getInt1Ty(*Ctx), - cast(CI->getOperand(0)->getType())->getNumElements()); + cast(CI->getOperand(0)->getType()) + ->getNumElements()); return SPIRVName; }, [=](CallInst *NewCI) -> Instruction * { Value *False = nullptr, *True = nullptr; if (NewCI->getType()->isVectorTy()) { Type *IntTy = Type::getInt32Ty(*Ctx); - if (cast(NewCI->getOperand(0)->getType()) + if (cast(NewCI->getOperand(0)->getType()) ->getElementType() ->isDoubleTy()) IntTy = Type::getInt64Ty(*Ctx); - if (cast(NewCI->getOperand(0)->getType()) + if (cast(NewCI->getOperand(0)->getType()) ->getElementType() ->isHalfTy()) IntTy = Type::getInt16Ty(*Ctx); Type *VTy = FixedVectorType::get( - IntTy, cast(NewCI->getType())->getNumElements()); + IntTy, cast(NewCI->getType())->getNumElements()); False = Constant::getNullValue(VTy); True = Constant::getAllOnesValue(VTy); } else { @@ -1618,7 +1619,7 @@ static void processSubgroupBlockReadWriteINTEL(CallInst *CI, OCLBuiltinTransInfo &Info, const Type *DataTy, Module *M) { unsigned VectorNumElements = 1; - if (auto *VecTy = dyn_cast(DataTy)) + if (auto *VecTy = dyn_cast(DataTy)) VectorNumElements = VecTy->getNumElements(); unsigned ElementBitSize = DataTy->getScalarSizeInBits(); Info.Postfix = "_"; diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index cb230ce01f528..749bf53abe835 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -805,7 +805,7 @@ unsigned encodeVecTypeHint(Type *Ty) { llvm_unreachable("invalid integer type"); } } - if (VectorType *VecTy = dyn_cast(Ty)) { + if (FixedVectorType *VecTy = dyn_cast(Ty)) { Type *EleTy = VecTy->getElementType(); unsigned Size = VecTy->getNumElements(); return Size << 16 | encodeVecTypeHint(EleTy); diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index c0eab091f3bed..9781d595eadef 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -336,7 +336,7 @@ bool SPIRVToLLVM::transOCLBuiltinFromVariable(GlobalVariable *GV, std::vector Vectors; Loads.push_back(LD); if (HasIndexArg) { - auto *VecTy = cast( + auto *VecTy = cast( LD->getPointerOperandType()->getPointerElementType()); Value *EmptyVec = UndefValue::get(VecTy); Vectors.push_back(EmptyVec); @@ -1937,7 +1937,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, IRBuilder<> Builder(BB); auto Scalar = transValue(VTS->getScalar(), F, BB); auto Vector = transValue(VTS->getVector(), F, BB); - auto *VecTy = cast(Vector->getType()); + auto *VecTy = cast(Vector->getType()); unsigned VecSize = VecTy->getNumElements(); auto NewVec = Builder.CreateVectorSplat(VecSize, Scalar, Scalar->getName()); NewVec->takeName(Scalar); @@ -1965,8 +1965,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, unsigned M = Mat->getType()->getArrayNumElements(); - auto *VecTy = cast(Vec->getType()); - VectorType *VTy = FixedVectorType::get(VecTy->getElementType(), M); + auto *VecTy = cast(Vec->getType()); + FixedVectorType *VTy = FixedVectorType::get(VecTy->getElementType(), M); auto ETy = VTy->getElementType(); unsigned N = VecTy->getNumElements(); Value *V = Builder.CreateVectorSplat(M, ConstantFP::get(ETy, 0.0)); @@ -1994,7 +1994,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, auto Matrix = transValue(MTS->getMatrix(), F, BB); uint64_t ColNum = Matrix->getType()->getArrayNumElements(); auto ColType = cast(Matrix->getType())->getElementType(); - auto VecSize = cast(ColType)->getNumElements(); + auto VecSize = cast(ColType)->getNumElements(); auto NewVec = Builder.CreateVectorSplat(VecSize, Scalar, Scalar->getName()); NewVec->takeName(Scalar); @@ -2031,8 +2031,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, // where sum is defined as vector sum. unsigned M = Mat->getType()->getArrayNumElements(); - VectorType *VTy = - cast(cast(Mat->getType())->getElementType()); + FixedVectorType *VTy = cast( + cast(Mat->getType())->getElementType()); unsigned N = VTy->getNumElements(); auto ETy = VTy->getElementType(); Value *V = Builder.CreateVectorSplat(N, ConstantFP::get(ETy, 0.0)); @@ -2086,10 +2086,10 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, unsigned C1 = M1->getType()->getArrayNumElements(); unsigned C2 = M2->getType()->getArrayNumElements(); - VectorType *V1Ty = - cast(cast(M1->getType())->getElementType()); - VectorType *V2Ty = - cast(cast(M2->getType())->getElementType()); + FixedVectorType *V1Ty = + cast(cast(M1->getType())->getElementType()); + FixedVectorType *V2Ty = + cast(cast(M2->getType())->getElementType()); unsigned R1 = V1Ty->getNumElements(); unsigned R2 = V2Ty->getNumElements(); auto ETy = V1Ty->getElementType(); @@ -2127,8 +2127,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, IRBuilder<> Builder(BB); auto Matrix = transValue(TR->getMatrix(), F, BB); unsigned ColNum = Matrix->getType()->getArrayNumElements(); - VectorType *ColTy = - cast(cast(Matrix->getType())->getElementType()); + FixedVectorType *ColTy = cast( + cast(Matrix->getType())->getElementType()); unsigned RowNum = ColTy->getNumElements(); auto VTy = FixedVectorType::get(ColTy->getElementType(), ColNum); @@ -4268,7 +4268,7 @@ Instruction *SPIRVToLLVM::transOCLAllAny(SPIRVInstruction *I, BasicBlock *BB) { auto OldArg = CI->getOperand(0); auto NewArgTy = FixedVectorType::get( Int32Ty, - cast(OldArg->getType())->getNumElements()); + cast(OldArg->getType())->getNumElements()); auto NewArg = CastInst::CreateSExtOrBitCast(OldArg, NewArgTy, "", CI); Args[0] = NewArg; @@ -4294,16 +4294,17 @@ Instruction *SPIRVToLLVM::transOCLRelational(SPIRVInstruction *I, Type *IntTy = Type::getInt32Ty(*Context); RetTy = IntTy; if (CI->getType()->isVectorTy()) { - if (cast(CI->getOperand(0)->getType()) + if (cast(CI->getOperand(0)->getType()) ->getElementType() ->isDoubleTy()) IntTy = Type::getInt64Ty(*Context); - if (cast(CI->getOperand(0)->getType()) + if (cast(CI->getOperand(0)->getType()) ->getElementType() ->isHalfTy()) IntTy = Type::getInt16Ty(*Context); RetTy = FixedVectorType::get( - IntTy, cast(CI->getType())->getNumElements()); + IntTy, + cast(CI->getType())->getNumElements()); } return CI->getCalledFunction()->getName().str(); }, @@ -4312,7 +4313,7 @@ Instruction *SPIRVToLLVM::transOCLRelational(SPIRVInstruction *I, if (NewCI->getType()->isVectorTy()) RetTy = FixedVectorType::get( Type::getInt1Ty(*Context), - cast(NewCI->getType())->getNumElements()); + cast(NewCI->getType())->getNumElements()); return CastInst::CreateTruncOrBitCast(NewCI, RetTy, "", NewCI->getNextNode()); }, diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index 693faca282be3..7fd560e0dfb59 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -190,13 +190,13 @@ void SPIRVToOCL::visitCallSPRIVImageQuerySize(CallInst *CI) { GetImageSize, FixedVectorType::get( CI->getType()->getScalarType(), - cast(GetImageSize->getType())->getNumElements()), + cast(GetImageSize->getType())->getNumElements()), false, CI->getName(), CI); } } if (ImgArray || ImgDim == 3) { - auto *VecTy = cast(CI->getType()); + auto *VecTy = cast(CI->getType()); const unsigned ImgQuerySizeRetEls = VecTy->getNumElements(); if (ImgDim == 1) { @@ -224,7 +224,7 @@ void SPIRVToOCL::visitCallSPRIVImageQuerySize(CallInst *CI) { if (ImgArray) { assert((ImgDim == 1 || ImgDim == 2) && "invalid image array type"); // Insert get_image_array_size to the last position of the resulting vector. - auto *VecTy = cast(CI->getType()); + auto *VecTy = cast(CI->getType()); Type *SizeTy = Type::getIntNTy(*Ctx, M->getDataLayout().getPointerSizeInBits(0)); Instruction *GetImageArraySize = addCallInst( @@ -482,7 +482,7 @@ void SPIRVToOCL::visitCallSPIRVImageMediaBlockBuiltin(CallInst *CI, Op OC) { else assert(0 && "Unsupported texel type!"); - if (auto *VecTy = dyn_cast(RetType)) { + if (auto *VecTy = dyn_cast(RetType)) { unsigned int NumEl = VecTy->getNumElements(); assert((NumEl == 2 || NumEl == 4 || NumEl == 8 || NumEl == 16) && "Wrong function type!"); diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index 4c3c760f2484b..77f0f7da8b71a 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -141,7 +141,7 @@ std::string mapLLVMTypeToOCLType(const Type *Ty, bool Signed) { } return SignPrefix + Stem; } - if (auto VecTy = dyn_cast(Ty)) { + if (auto VecTy = dyn_cast(Ty)) { Type *EleTy = VecTy->getElementType(); unsigned Size = VecTy->getNumElements(); std::stringstream Ss; @@ -740,7 +740,7 @@ void makeVector(Instruction *InsPos, std::vector &Ops, void expandVector(Instruction *InsPos, std::vector &Ops, size_t VecPos) { auto Vec = Ops[VecPos]; - auto *VT = dyn_cast(Vec->getType()); + auto *VT = dyn_cast(Vec->getType()); if (!VT) return; size_t N = VT->getNumElements(); @@ -1047,7 +1047,7 @@ static SPIR::RefParamType transTypeDesc(Type *Ty, return SPIR::RefParamType(new SPIR::PrimitiveType(SPIR::PRIMITIVE_FLOAT)); if (Ty->isDoubleTy()) return SPIR::RefParamType(new SPIR::PrimitiveType(SPIR::PRIMITIVE_DOUBLE)); - if (auto *VecTy = dyn_cast(Ty)) { + if (auto *VecTy = dyn_cast(Ty)) { return SPIR::RefParamType(new SPIR::VectorType( transTypeDesc(VecTy->getElementType(), Info), VecTy->getNumElements())); } @@ -1161,7 +1161,7 @@ Value *getScalarOrArray(Value *V, unsigned Size, Instruction *Pos) { Constant *getScalarOrVectorConstantInt(Type *T, uint64_t V, bool IsSigned) { if (auto IT = dyn_cast(T)) return ConstantInt::get(IT, V); - if (auto VT = dyn_cast(T)) { + if (auto VT = dyn_cast(T)) { std::vector EV( VT->getNumElements(), getScalarOrVectorConstantInt(VT->getElementType(), V, IsSigned)); @@ -1538,7 +1538,7 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { if (II->getArgOperand(0)->getType() != Ty) return false; int NumElems = 1; - if (auto *VecTy = dyn_cast(Ty)) { + if (auto *VecTy = dyn_cast(Ty)) { NumElems = VecTy->getNumElements(); Ty = VecTy->getElementType(); } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 7c9d6da416604..0b38cf8164e69 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -376,7 +376,7 @@ SPIRVType *LLVMToSPIRV::transType(Type *T) { } } - if (auto *VecTy = dyn_cast(T)) + if (auto *VecTy = dyn_cast(T)) return mapType(T, BM->addVectorType(transType(VecTy->getElementType()), VecTy->getNumElements())); @@ -1571,8 +1571,7 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, if (Instruction *Inst = dyn_cast(V)) { BM->getErrorLog().checkError(false, SPIRVEC_InvalidInstruction, - toString(Inst) + "\n", nullptr, __FILE__, - __LINE__); + toString(Inst) + "\n", "", __FILE__, __LINE__); } llvm_unreachable("Not implemented"); @@ -1957,6 +1956,21 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::expect: + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_uitofp: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: + case Intrinsic::experimental_constrained_fmuladd: case Intrinsic::fmuladd: case Intrinsic::memset: case Intrinsic::memcpy: @@ -1978,6 +1992,24 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { } } +// Performs mapping of LLVM IR rounding mode to SPIR-V rounding mode +// Value *V is metadata argument of +// llvm.experimental.constrained.* intrinsics +SPIRVInstruction * +LLVMToSPIRV::applyRoundingModeConstraint(Value *V, SPIRVInstruction *I) { + StringRef RMode = + cast(cast(V)->getMetadata())->getString(); + if (RMode.endswith("tonearest")) + I->addFPRoundingMode(FPRoundingModeRTE); + else if (RMode.endswith("towardzero")) + I->addFPRoundingMode(FPRoundingModeRTZ); + else if (RMode.endswith("upward")) + I->addFPRoundingMode(FPRoundingModeRTP); + else if (RMode.endswith("downward")) + I->addFPRoundingMode(FPRoundingModeRTN); + return I; +} + SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, SPIRVBasicBlock *BB) { auto GetMemoryAccess = [](MemIntrinsic *MI) -> std::vector { @@ -2065,6 +2097,105 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, } return Value; } + case Intrinsic::experimental_constrained_fadd: { + auto BI = BM->addBinaryInst(OpFAdd, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fsub: { + auto BI = BM->addBinaryInst(OpFSub, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fmul: { + auto BI = BM->addBinaryInst(OpFMul, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fdiv: { + auto BI = BM->addBinaryInst(OpFDiv, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_frem: { + auto BI = BM->addBinaryInst(OpFRem, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fma: { + std::vector Args{transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), + transValue(II->getArgOperand(2), BB)}; + auto BI = BM->addExtInst(transType(II->getType()), + BM->getExtInstSetId(SPIRVEIS_OpenCL), + OpenCLLIB::Fma, Args, BB); + return applyRoundingModeConstraint(II->getOperand(3), BI); + } + case Intrinsic::experimental_constrained_fptoui: { + return BM->addUnaryInst(OpConvertFToU, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } + case Intrinsic::experimental_constrained_fptosi: { + return BM->addUnaryInst(OpConvertFToS, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } + case Intrinsic::experimental_constrained_uitofp: { + auto BI = BM->addUnaryInst(OpConvertUToF, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + return applyRoundingModeConstraint(II->getOperand(1), BI); + } + case Intrinsic::experimental_constrained_sitofp: { + auto BI = BM->addUnaryInst(OpConvertSToF, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + return applyRoundingModeConstraint(II->getOperand(1), BI); + } + case Intrinsic::experimental_constrained_fpext: { + return BM->addUnaryInst(OpFConvert, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } + case Intrinsic::experimental_constrained_fptrunc: { + auto BI = BM->addUnaryInst(OpFConvert, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + return applyRoundingModeConstraint(II->getOperand(1), BI); + } + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: { + auto MetaMod = cast(II->getOperand(2))->getMetadata(); + Op CmpTypeOp = StringSwitch(cast(MetaMod)->getString()) + .Case("oeq", OpFOrdEqual) + .Case("ogt", OpFOrdGreaterThan) + .Case("oge", OpFOrdGreaterThanEqual) + .Case("olt", OpFOrdLessThan) + .Case("ole", OpFOrdLessThanEqual) + .Case("one", OpFOrdNotEqual) + .Case("ord", OpOrdered) + .Case("ueq", OpFUnordEqual) + .Case("ugt", OpFUnordGreaterThan) + .Case("uge", OpFUnordGreaterThanEqual) + .Case("ult", OpFUnordLessThan) + .Case("ule", OpFUnordLessThanEqual) + .Case("une", OpFUnordNotEqual) + .Case("uno", OpUnordered) + .Default(OpNop); + assert(CmpTypeOp != OpNop && "Invalid condition code!"); + return BM->addCmpInst(CmpTypeOp, transType(II->getType()), + transValue(II->getOperand(0), BB), + transValue(II->getOperand(1), BB), BB); + } + case Intrinsic::experimental_constrained_fmuladd: { + SPIRVType *Ty = transType(II->getType()); + SPIRVValue *Mul = + BM->addBinaryInst(OpFMul, Ty, transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + auto BI = BM->addBinaryInst(OpFAdd, Ty, Mul, + transValue(II->getArgOperand(2), BB), BB); + return applyRoundingModeConstraint(II->getOperand(3), BI); + } case Intrinsic::fmuladd: { // For llvm.fmuladd.* fusion is not guaranteed. If a fused multiply-add // is required the corresponding llvm.fma.* intrinsic function should be @@ -2832,17 +2963,26 @@ SPIRVInstruction *LLVMToSPIRV::transBuiltinToInst(StringRef DemangledName, !BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_blocking_pipes)) return nullptr; - if (OpFixedSqrtINTEL <= OC && OC <= OpFixedExpINTEL && - !BM->isAllowedToUseExtension( - ExtensionID::SPV_INTEL_arbitrary_precision_fixed_point)) - return nullptr; - - if (((OpArbitraryFloatSinCosPiINTEL <= OC && - OC <= OpArbitraryFloatCastToIntINTEL) || - (OpArbitraryFloatAddINTEL <= OC && OC <= OpArbitraryFloatPowNINTEL)) && - !BM->isAllowedToUseExtension( - ExtensionID::SPV_INTEL_arbitrary_precision_floating_point)) - return nullptr; + if (OpFixedSqrtINTEL <= OC && OC <= OpFixedExpINTEL) + BM->getErrorLog().checkError( + BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_arbitrary_precision_fixed_point), + SPIRVEC_InvalidInstruction, + CI->getCalledOperand()->getName().str() + + "\nFixed point instructions can't be translated correctly without " + "enabled SPV_INTEL_arbitrary_precision_fixed_point extension!\n"); + + if ((OpArbitraryFloatSinCosPiINTEL <= OC && + OC <= OpArbitraryFloatCastToIntINTEL) || + (OpArbitraryFloatAddINTEL <= OC && OC <= OpArbitraryFloatPowNINTEL)) + BM->getErrorLog().checkError( + BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_arbitrary_precision_floating_point), + SPIRVEC_InvalidInstruction, + CI->getCalledOperand()->getName().str() + + "\nFloating point instructions can't be translated correctly " + "without enabled SPV_INTEL_arbitrary_precision_floating_point " + "extension!\n"); auto Inst = transBuiltinToInstWithoutDecoration(OC, CI, BB); addDecorations(Inst, Dec); @@ -3323,7 +3463,7 @@ LLVMToSPIRV::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI, auto IsVector = ResultTy->isVectorTy(); if (IsVector) BoolTy = FixedVectorType::get( - BoolTy, cast(ResultTy)->getNumElements()); + BoolTy, cast(ResultTy)->getNumElements()); auto BBT = transType(BoolTy); SPIRVInstruction *Res; if (isCmpOpCode(OC)) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.h b/llvm-spirv/lib/SPIRV/SPIRVWriter.h index 0dc93519b21f4..40f8b991324b6 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.h @@ -133,6 +133,9 @@ class LLVMToSPIRV : public ModulePass { FuncTransMode FuncTrans = FuncTransMode::Decl); void transGlobalIOPipeStorage(GlobalVariable *V, MDNode *IO); + static SPIRVInstruction *applyRoundingModeConstraint(Value *V, + SPIRVInstruction *I); + typedef DenseMap LLVMToSPIRVTypeMap; typedef DenseMap LLVMToSPIRVValueMap; typedef DenseMap LLVMToSPIRVMetadataMap; diff --git a/llvm-spirv/test/constrained_arifm_intr.ll b/llvm-spirv/test/constrained_arifm_intr.ll new file mode 100644 index 0000000000000..4ccce47ce893f --- /dev/null +++ b/llvm-spirv/test/constrained_arifm_intr.ll @@ -0,0 +1,87 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + + +; CHECK: Name [[ad:[0-9]+]] "add" +; CHECK: Name [[di:[0-9]+]] "div" +; CHECK: Name [[su:[0-9]+]] "sub" +; CHECK: Name [[mu:[0-9]+]] "mul" + +; CHECK-NOT: Decorate {{[0-9]+}} FPRoundingMode + +; CHECK: Decorate [[ad]] FPRoundingMode 0 +; CHECK: Decorate [[di]] FPRoundingMode 1 +; CHECK: Decorate [[su]] FPRoundingMode 2 +; CHECK: Decorate [[mu]] FPRoundingMode 3 + +; CHECK-NOT: Decorate {{[0-9]+}} FPRoundingMode + +; CHECK: FAdd {{[0-9]+}} [[ad]] +; CHECK: FDiv {{[0-9]+}} [[di]] +; CHECK: FSub {{[0-9]+}} [[su]] +; CHECK: FMul {{[0-9]+}} [[mu]] +; CHECK: FMul +; CHECK: FAdd +; CHECK: ExtInst {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} fma +; CHECK: FRem + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind strictfp +define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 { +entry: + %add = tail call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #2 + %div = tail call float @llvm.experimental.constrained.fdiv.f32(float %add, float %add, metadata !"round.towardzero", metadata !"fpexcept.strict") #2, !fpmath !10 + %sub = tail call float @llvm.experimental.constrained.fsub.f32(float %div, float %div, metadata !"round.upward", metadata !"fpexcept.strict") #2 + %mul = tail call float @llvm.experimental.constrained.fmul.f32(float %sub, float %sub, metadata !"round.downward", metadata !"fpexcept.strict") #2 + %0 = tail call float @llvm.experimental.constrained.fmuladd.f32(float %mul, float %mul, float %mul, metadata !"round.tonearestaway", metadata !"fpexcept.strict") #2 + %1 = tail call float @llvm.experimental.constrained.fma.f32(float %0, float %0, float %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #2 + %2 = tail call float @llvm.experimental.constrained.frem.f32(float %1, float %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1 + +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inaccessiblememonly nounwind willreturn } +attributes #2 = { strictfp } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"} +!5 = !{i32 0, i32 0, i32 0} +!6 = !{!"none", !"none", !"none"} +!7 = !{!"float", !"int", !"uint"} +!8 = !{!"", !"", !""} +!9 = !{i32 -1, i32 -1, i32 -1} +!10 = !{float 2.500000e+00} diff --git a/llvm-spirv/test/constrained_cmp_intr.ll b/llvm-spirv/test/constrained_cmp_intr.ll new file mode 100644 index 0000000000000..bb34d32ff41d8 --- /dev/null +++ b/llvm-spirv/test/constrained_cmp_intr.ll @@ -0,0 +1,69 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +;CHECK: FOrdEqual +;CHECK: FOrdGreaterThan +;CHECK: FOrdGreaterThanEqual +;CHECK: FOrdLessThan +;CHECK: FOrdLessThanEqual +;CHECK: FOrdNotEqual +;CHECK: Ordered +;CHECK: FUnordEqual +;CHECK: FUnordGreaterThan +;CHECK: FUnordGreaterThanEqual +;CHECK: FUnordLessThan +;CHECK: FUnordLessThanEqual +;CHECK: FUnordNotEqual +;CHECK: Unordered + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind strictfp +define dso_local spir_kernel void @test(float %a) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 { +entry: + %cmp = tail call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %a, metadata !"oeq", metadata !"fpexcept.strict") #2 + %cmp1 = tail call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %a, metadata !"ogt", metadata !"fpexcept.strict") #2 + %cmp2 = tail call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %a, metadata !"oge", metadata !"fpexcept.strict") #2 + %cmp3 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"olt", metadata !"fpexcept.strict") #2 + %cmp4 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ole", metadata !"fpexcept.strict") #2 + %cmp5 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"one", metadata !"fpexcept.strict") #2 + %cmp6 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ord", metadata !"fpexcept.strict") #2 + %cmp7 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ueq", metadata !"fpexcept.strict") #2 + %cmp8 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ugt", metadata !"fpexcept.strict") #2 + %cmp9 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"uge", metadata !"fpexcept.strict") #2 + %cmp10 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ult", metadata !"fpexcept.strict") #2 + %cmp11 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ule", metadata !"fpexcept.strict") #2 + %cmp12 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"une", metadata !"fpexcept.strict") #2 + %cmp13 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"uno", metadata !"fpexcept.strict") #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) #1 + +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inaccessiblememonly nounwind willreturn } +attributes #2 = { strictfp } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"} +!5 = !{i32 0} +!6 = !{!"none"} +!7 = !{!"float"} +!8 = !{!""} +!9 = !{i32 -1} diff --git a/llvm-spirv/test/constrained_convert_intr.ll b/llvm-spirv/test/constrained_convert_intr.ll new file mode 100644 index 0000000000000..7547bae2df6a9 --- /dev/null +++ b/llvm-spirv/test/constrained_convert_intr.ll @@ -0,0 +1,81 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +; CHECK: Name [[sf:[0-9]+]] "conv" +; CHECK: Name [[uf:[0-9]+]] "conv1" +; CHECK: Name [[fs:[0-9]+]] "conv2" +; CHECK: Name [[fu:[0-9]+]] "conv3" +; CHECK: Name [[fe:[0-9]+]] "conv4" +; CHECK: Name [[ft:[0-9]+]] "conv5" + +; CHECK: Decorate [[sf]] FPRoundingMode 0 +; CHECK: Decorate [[uf]] FPRoundingMode 1 + +; CHECK-NOT: Decorate [[fs]] FPRoundingMode +; CHECK-NOT: Decorate [[fu]] FPRoundingMode +; CHECK-NOT: Decorate [[fe]] FPRoundingMode + +; CHECK: Decorate [[ft]] FPRoundingMode 2 + +;CHECK: ConvertSToF {{[0-9]+}} [[sf]] +;CHECK: ConvertUToF {{[0-9]+}} [[uf]] +;CHECK: ConvertFToS {{[0-9]+}} [[fs]] +;CHECK: ConvertFToU {{[0-9]+}} [[fu]] +;CHECK: FConvert {{[0-9]+}} [[fe]] +;CHECK: FConvert {{[0-9]+}} [[ft]] + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind strictfp +define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 { +entry: + %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %in, metadata !"round.tonearest", metadata !"fpexcept.ignore") #2 + %conv1 = tail call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %ui, metadata !"round.towardzero", metadata !"fpexcept.ignore") #2 + %conv2 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %conv1, metadata !"fpexcept.ignore") #2 + %conv3 = tail call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %conv1, metadata !"fpexcept.ignore") #2 + %conv4 = tail call double @llvm.experimental.constrained.fpext.f64.f32(float %conv1, metadata !"fpexcept.ignore") #2 + %conv5 = tail call float @llvm.experimental.constrained.fptrunc.f32.f64(double %conv4, metadata !"round.upward", metadata !"fpexcept.ignore") #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1 + +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inaccessiblememonly nounwind willreturn } +attributes #2 = { strictfp } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"} +!5 = !{i32 0, i32 0, i32 0} +!6 = !{!"none", !"none", !"none"} +!7 = !{!"float", !"int", !"uint"} +!8 = !{!"", !"", !""} +!9 = !{i32 -1, i32 -1, i32 -1} diff --git a/llvm-spirv/test/negative/unsup_invoke_instr.ll b/llvm-spirv/test/negative/unsup_invoke_instr.ll index f5d2632591277..1bc31610ccec2 100644 --- a/llvm-spirv/test/negative/unsup_invoke_instr.ll +++ b/llvm-spirv/test/negative/unsup_invoke_instr.ll @@ -1,4 +1,3 @@ -; XFAIL: windows ;Translator does not parse some llvm instructions ;and emit errror message in that case. ; RUN: llvm-as %s -o %t.bc diff --git a/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll b/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll index c13b30d8083e3..6611ae2adea92 100644 --- a/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll +++ b/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll @@ -95,7 +95,9 @@ ; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_INTEL_arbitrary_precision_fixed_point -o %t.spv ; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV-NEGATIVE +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR: Fixed point instructions can't be translated correctly without enabled SPV_INTEL_arbitrary_precision_fixed_point extension! ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM @@ -106,9 +108,6 @@ ; CHECK-SPIRV: 12 Extension "SPV_INTEL_arbitrary_precision_fixed_point" ; CHECK-SPIRV: 11 Extension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-SPIRV-NEGATIVE-NOT: 2 Capability ArbitraryPrecisionFixedPointINTEL -; CHECK-SPIRV-NEGATIVE-NOT: 12 Extension "SPV_INTEL_arbitrary_precision_fixed_point" - ; CHECK-SPIRV: 4 TypeInt [[Ty_8:[0-9]+]] 8 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_13:[0-9]+]] 13 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_5:[0-9]+]] 5 0 @@ -128,53 +127,40 @@ ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Sqrt_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_5]] [[#]] [[Sqrt_InId]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_5]] [[Sqrt_InId_B:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_13]] [[#]] [[Sqrt_InId_B]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_5]] [[Sqrt_InId_C:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_13]] [[#]] [[Sqrt_InId_C]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_3]] [[Recip_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedRecipINTEL [[Ty_8]] [[#]] [[Recip_InId]] 1 4 4 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedRecipINTEL ; CHECK-SPIRV: 6 Load [[Ty_11]] [[Rsqrt_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedRsqrtINTEL [[Ty_10]] [[#]] [[Rsqrt_InId]] 0 8 6 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedRsqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_17]] [[Sin_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinINTEL [[Ty_11]] [[#]] [[Sin_InId]] 1 7 5 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinINTEL ; CHECK-SPIRV: 6 Load [[Ty_35]] [[Cos_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedCosINTEL [[Ty_28]] [[#]] [[Cos_InId]] 0 9 3 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedCosINTEL ; CHECK-SPIRV: 6 Load [[Ty_31]] [[SinCos_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinCosINTEL [[Ty_40]] [[#]] [[SinCos_InId]] 1 10 12 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinCosINTEL ; CHECK-SPIRV: 6 Load [[Ty_60]] [[SinPi_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinPiINTEL [[Ty_5]] [[#]] [[SinPi_InId]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinPiINTEL ; CHECK-SPIRV: 6 Load [[Ty_28]] [[CosPi_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedCosPiINTEL [[Ty_16]] [[#]] [[CosPi_InId]] 0 8 5 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedCosPiINTEL ; CHECK-SPIRV: 6 Load [[Ty_13]] [[SinCosPi_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinCosPiINTEL [[Ty_10]] [[#]] [[SinCosPi_InId]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinCosPiINTEL ; CHECK-SPIRV: 6 Load [[Ty_64]] [[Log_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedLogINTEL [[Ty_44]] [[#]] [[Log_InId]] 1 24 22 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedLogINTEL ; CHECK-SPIRV: 6 Load [[Ty_44]] [[Exp_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedExpINTEL [[Ty_34]] [[#]] [[Exp_InId]] 0 20 20 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedExpINTEL ; CHECK-LLVM: call i5 @intel_arbitrary_fixed_sqrt.i5.i13(i13 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0) ; CHECK-LLVM: call i13 @intel_arbitrary_fixed_sqrt.i13.i5(i5 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0) diff --git a/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll b/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll index 0e24d4f8186f1..b6a295c4839ce 100644 --- a/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll +++ b/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll @@ -403,7 +403,9 @@ ; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_INTEL_arbitrary_precision_floating_point -o %t.spv ; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV-NEGATIVE +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR: Floating point instructions can't be translated correctly without enabled SPV_INTEL_arbitrary_precision_floating_point extension! ; RUN: llvm-spirv -r %t.spv -o %t.r.bc ; RUN: llvm-dis < %t.r.bc | FileCheck %s --check-prefix=CHECK-LLVM @@ -414,9 +416,6 @@ ; CHECK-SPIRV: 13 Extension "SPV_INTEL_arbitrary_precision_floating_point" ; CHECK-SPIRV: 11 Extension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] Capability ArbitraryPrecisionFloatingPointINTEL -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] Extension "SPV_INTEL_arbitrary_precision_floating_point" - ; CHECK-SPIRV: 4 TypeInt [[Ty_8:[0-9]+]] 8 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_40:[0-9]+]] 40 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_43:[0-9]+]] 43 0 @@ -549,7 +548,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_castILi11ELi28ELi9ELi %6 = call spir_func i40 @_Z31__spirv_ArbitraryFloatCastINTELILi40ELi40EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i40 %5, i32 28, i32 30, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_40]] [[Cast_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCastINTEL [[Ty_40]] [[#]] [[Cast_AId]] 28 30 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCastINTEL ; CHECK-LLVM: call i40 @intel_arbitrary_float_cast.i40.i40(i40 %[[#]], i32 28, i32 30, i32 0, i32 2, i32 1) store i40 %6, i40* %2, align 8, !tbaa !9 %7 = bitcast i40* %2 to i8* @@ -571,7 +569,6 @@ define linkonce_odr dso_local spir_func void @_Z22ap_float_cast_from_intILi43ELi %6 = call spir_func signext i25 @_Z38__spirv_ArbitraryFloatCastFromIntINTELILi43ELi25EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiii(i43 %5, i32 16, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_43]] [[CastFromInt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 8 ArbitraryFloatCastFromIntINTEL [[Ty_25]] [[#]] [[CastFromInt_AId]] 16 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCastFromIntINTEL ; CHECK-LLVM: call i25 @intel_arbitrary_float_cast_from_int.i25.i43(i43 %[[#]], i32 16, i32 0, i32 2, i32 1) store i25 %6, i25* %2, align 4, !tbaa !13 %7 = bitcast i25* %2 to i8* @@ -593,7 +590,6 @@ define linkonce_odr dso_local spir_func void @_Z20ap_float_cast_to_intILi7ELi15E %6 = call spir_func signext i30 @_Z36__spirv_ArbitraryFloatCastToIntINTELILi23ELi30EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiii(i23 signext %5, i32 15, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_23]] [[CastToInt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 8 ArbitraryFloatCastToIntINTEL [[Ty_30]] [[#]] [[CastToInt_AId]] 15 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCastToIntINTEL ; CHECK-LLVM: call i30 @intel_arbitrary_float_cast_to_int.i30.i23(i23 %[[#]], i32 15, i32 0, i32 2, i32 1) store i30 %6, i30* %2, align 4, !tbaa !17 %7 = bitcast i30* %2 to i8* @@ -627,7 +623,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi5ELi7ELi6ELi8EL ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Add1_A1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_15]] [[Add1_B1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_14]] [[#]] [[Add1_A1Id]] 7 [[Add1_B1Id]] 8 9 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i14 @intel_arbitrary_float_add.i14.i13.i15(i13 %[[#]], i32 7, i15 %[[#]], i32 8, i32 9, i32 0, i32 2, i32 1) store i14 %14, i14* %5, align 2, !tbaa !23 %15 = bitcast i14* %6 to i8* @@ -638,7 +633,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi5ELi7ELi6ELi8EL ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Add1_A2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_15]] [[Add1_B2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_14]] [[#]] [[Add1_A2Id]] 7 [[Add1_B2Id]] 8 9 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i14 @intel_arbitrary_float_add.i14.i13.i15(i13 %[[#]], i32 7, i15 %[[#]], i32 8, i32 9, i32 0, i32 2, i32 1) store i14 %18, i14* %6, align 2, !tbaa !23 %19 = bitcast i14* %6 to i8* @@ -680,7 +674,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi6ELi8ELi4ELi9EL ; CHECK-SPIRV: 6 Load [[Ty_15]] [[Add2_A1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_14]] [[Add2_B1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_13]] [[#]] [[Add2_A1Id]] 8 [[Add2_B1Id]] 9 7 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_add.i13.i15.i14(i15 %[[#]], i32 8, i14 %[[#]], i32 9, i32 7, i32 0, i32 2, i32 1) store i13 %14, i13* %5, align 2, !tbaa !19 %15 = bitcast i13* %6 to i8* @@ -691,7 +684,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi6ELi8ELi4ELi9EL ; CHECK-SPIRV: 6 Load [[Ty_15]] [[Add2_A2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_14]] [[Add2_B2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_13]] [[#]] [[Add2_A2Id]] 8 [[Add2_B2Id]] 9 7 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_add.i13.i15.i14(i15 %[[#]], i32 8, i14 %[[#]], i32 9, i32 7, i32 0, i32 2, i32 1) store i13 %18, i13* %6, align 2, !tbaa !19 %19 = bitcast i13* %6 to i8* @@ -726,7 +718,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_subILi4ELi4ELi5ELi5EL ; CHECK-SPIRV: 6 Load [[Ty_9]] [[Sub_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_11]] [[Sub_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatSubINTEL [[Ty_13]] [[#]] [[Sub_AId]] 4 [[Sub_BId]] 5 6 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSubINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_sub.i13.i9.i11(i9 %[[#]], i32 4, i11 %[[#]], i32 5, i32 6, i32 0, i32 2, i32 1) store i13 %9, i13* %3, align 2, !tbaa !19 %10 = bitcast i13* %3 to i8* @@ -755,7 +746,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_mulILi16ELi34ELi16ELi ; CHECK-SPIRV: 6 Load [[Ty_51]] [[Mul_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_51]] [[Mul_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatMulINTEL [[Ty_51]] [[#]] [[Mul_AId]] 34 [[Mul_BId]] 34 34 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatMulINTEL ; CHECK-LLVM: call i51 @intel_arbitrary_float_mul.i51.i51.i51(i51 %[[#]], i32 34, i51 %[[#]], i32 34, i32 34, i32 0, i32 2, i32 1) store i51 %9, i51* %3, align 8, !tbaa !29 %10 = bitcast i51* %3 to i8* @@ -784,7 +774,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_divILi4ELi11ELi4ELi11 ; CHECK-SPIRV: 6 Load [[Ty_16]] [[Div_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_16]] [[Div_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatDivINTEL [[Ty_18]] [[#]] [[Div_AId]] 11 [[Div_BId]] 11 12 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatDivINTEL ; CHECK-LLVM: call i18 @intel_arbitrary_float_div.i18.i16.i16(i16 %[[#]], i32 11, i16 %[[#]], i32 11, i32 12, i32 0, i32 2, i32 1) store i18 %9, i18* %3, align 4, !tbaa !33 %10 = bitcast i18* %3 to i8* @@ -812,7 +801,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_gtILi20ELi42ELi21ELi4 ; CHECK-SPIRV: 6 Load [[Ty_63]] [[GT_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_63]] [[GT_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatGTINTEL [[Ty_Bool]] [[#]] [[GT_AId]] 42 [[GT_BId]] 41 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatGTINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_gt.i1.i63.i63(i63 %[[#]], i32 42, i63 %[[#]], i32 41) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -840,7 +828,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_geILi19ELi27ELi19ELi2 ; CHECK-SPIRV: 6 Load [[Ty_47]] [[GE_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_47]] [[GE_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatGEINTEL [[Ty_Bool]] [[#]] [[GE_AId]] 27 [[GE_BId]] 27 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatGEINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_ge.i1.i47.i47(i47 %[[#]], i32 27, i47 %[[#]], i32 27) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -868,7 +855,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_ltILi2ELi2ELi3ELi3EEv ; CHECK-SPIRV: 6 Load [[Ty_5]] [[LT_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_7]] [[LT_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatLTINTEL [[Ty_Bool]] [[#]] [[LT_AId]] 2 [[LT_BId]] 3 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLTINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_lt.i1.i5.i7(i5 %[[#]], i32 2, i7 %[[#]], i32 3) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -896,7 +882,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_leILi27ELi27ELi26ELi2 ; CHECK-SPIRV: 6 Load [[Ty_55]] [[LE_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_55]] [[LE_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatLEINTEL [[Ty_Bool]] [[#]] [[LE_AId]] 27 [[LE_BId]] 28 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLEINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_le.i1.i55.i55(i55 %[[#]], i32 27, i55 %[[#]], i32 28) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -924,7 +909,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_eqILi7ELi12ELi7ELi7EE ; CHECK-SPIRV: 6 Load [[Ty_20]] [[EQ_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_15]] [[EQ_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatEQINTEL [[Ty_Bool]] [[#]] [[EQ_AId]] 12 [[EQ_BId]] 7 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatEQINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_eq.i1.i20.i15(i20 %[[#]], i32 12, i15 %[[#]], i32 7) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -948,7 +932,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_recipILi9ELi29ELi9ELi %6 = call spir_func i39 @_Z32__spirv_ArbitraryFloatRecipINTELILi39ELi39EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i39 %5, i32 29, i32 29, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_39]] [[Recip_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatRecipINTEL [[Ty_39]] [[#]] [[Recip_AId]] 29 29 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatRecipINTEL ; CHECK-LLVM: call i39 @intel_arbitrary_float_recip.i39.i39(i39 %[[#]], i32 29, i32 29, i32 0, i32 2, i32 1) store i39 %6, i39* %2, align 8, !tbaa !49 %7 = bitcast i39* %2 to i8* @@ -970,7 +953,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_rsqrtILi12ELi19ELi13E %6 = call spir_func i34 @_Z32__spirv_ArbitraryFloatRSqrtINTELILi32ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i32 %5, i32 19, i32 20, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_32]] [[Rsqrt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatRSqrtINTEL [[Ty_34]] [[#]] [[Rsqrt_AId]] 19 20 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatRSqrtINTEL ; CHECK-LLVM: call i34 @intel_arbitrary_float_rsqrt.i34.i32(i32 %[[#]], i32 19, i32 20, i32 0, i32 2, i32 1) store i34 %6, i34* %2, align 8, !tbaa !53 %7 = bitcast i34* %2 to i8* @@ -992,7 +974,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_cbrtILi0ELi1ELi0ELi1E %6 = call spir_func signext i2 @_Z31__spirv_ArbitraryFloatCbrtINTELILi2ELi2EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i2 signext %5, i32 1, i32 1, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_2]] [[Cbrt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCbrtINTEL [[Ty_2]] [[#]] [[Cbrt_AId]] 1 1 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCbrtINTEL ; CHECK-LLVM: call i2 @intel_arbitrary_float_cbrt.i2.i2(i2 %[[#]], i32 1, i32 1, i32 0, i32 2, i32 1) store i2 %6, i2* %2, align 1, !tbaa !55 %7 = bitcast i2* %2 to i8* @@ -1019,7 +1000,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_hypotILi20ELi20ELi21E ; CHECK-SPIRV: 6 Load [[Ty_41]] [[Hypot_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_43]] [[Hypot_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatHypotINTEL [[Ty_42]] [[#]] [[Hypot_AId]] 20 [[Hypot_BId]] 21 22 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatHypotINTEL ; CHECK-LLVM: call i42 @intel_arbitrary_float_hypot.i42.i41.i43(i41 %[[#]], i32 20, i43 %[[#]], i32 21, i32 22, i32 0, i32 2, i32 1) store i42 %9, i42* %3, align 8, !tbaa !59 %10 = bitcast i42* %3 to i8* @@ -1043,7 +1023,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_sqrtILi7ELi7ELi8ELi8E %6 = call spir_func signext i17 @_Z31__spirv_ArbitraryFloatSqrtINTELILi15ELi17EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i15 signext %5, i32 7, i32 8, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_15]] [[Sqrt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSqrtINTEL [[Ty_17]] [[#]] [[Sqrt_AId]] 7 8 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSqrtINTEL ; CHECK-LLVM: call i17 @intel_arbitrary_float_sqrt.i17.i15(i15 %[[#]], i32 7, i32 8, i32 0, i32 2, i32 1) store i17 %6, i17* %2, align 4, !tbaa !61 %7 = bitcast i17* %2 to i8* @@ -1065,7 +1044,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_logILi30ELi19ELi19ELi %6 = call spir_func i50 @_Z30__spirv_ArbitraryFloatLogINTELILi50ELi50EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i50 %5, i32 19, i32 30, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_50]] [[Log_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLogINTEL [[Ty_50]] [[#]] [[Log_AId]] 19 30 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLogINTEL ; CHECK-LLVM: call i50 @intel_arbitrary_float_log.i50.i50(i50 %[[#]], i32 19, i32 30, i32 0, i32 2, i32 1) store i50 %6, i50* %2, align 8, !tbaa !63 %7 = bitcast i50* %2 to i8* @@ -1087,7 +1065,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_log2ILi17ELi20ELi18EL %6 = call spir_func i38 @_Z31__spirv_ArbitraryFloatLog2INTELILi38ELi38EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i38 %5, i32 20, i32 19, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_38]] [[Log2_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLog2INTEL [[Ty_38]] [[#]] [[Log2_AId]] 20 19 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLog2INTEL ; CHECK-LLVM: call i38 @intel_arbitrary_float_log2.i38.i38(i38 %[[#]], i32 20, i32 19, i32 0, i32 2, i32 1) store i38 %6, i38* %2, align 8, !tbaa !65 %7 = bitcast i38* %2 to i8* @@ -1108,7 +1085,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_log10ILi4ELi3ELi4ELi5 %5 = call spir_func signext i10 @_Z32__spirv_ArbitraryFloatLog10INTELILi8ELi10EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i8 signext %4, i32 3, i32 5, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_8]] [[Log10_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLog10INTEL [[Ty_10]] [[#]] [[Log10_AId]] 3 5 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLog10INTEL ; CHECK-LLVM: call i10 @intel_arbitrary_float_log10.i10.i8(i8 %[[#]], i32 3, i32 5, i32 0, i32 2, i32 1) store i10 %5, i10* %2, align 2, !tbaa !69 %6 = bitcast i10* %2 to i8* @@ -1129,7 +1105,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_log1pILi17ELi30ELi18E %6 = call spir_func i49 @_Z32__spirv_ArbitraryFloatLog1pINTELILi48ELi49EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i48 %5, i32 30, i32 30, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_48]] [[Log1p_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLog1pINTEL [[Ty_49]] [[#]] [[Log1p_AId]] 30 30 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLog1pINTEL ; CHECK-LLVM: call i49 @intel_arbitrary_float_log1p.i49.i48(i48 %[[#]], i32 30, i32 30, i32 0, i32 2, i32 1) store i49 %6, i49* %2, align 8, !tbaa !73 %7 = bitcast i49* %2 to i8* @@ -1151,7 +1126,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_expILi16ELi25ELi16ELi %6 = call spir_func i42 @_Z30__spirv_ArbitraryFloatExpINTELILi42ELi42EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i42 %5, i32 25, i32 25, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_42]] [[Exp_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExpINTEL [[Ty_42]] [[#]] [[Exp_AId]] 25 25 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExpINTEL ; CHECK-LLVM: call i42 @intel_arbitrary_float_exp.i42.i42(i42 %[[#]], i32 25, i32 25, i32 0, i32 2, i32 1) store i42 %6, i42* %2, align 8, !tbaa !59 %7 = bitcast i42* %2 to i8* @@ -1173,7 +1147,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_exp2ILi1ELi1ELi2ELi2E %6 = call spir_func signext i5 @_Z31__spirv_ArbitraryFloatExp2INTELILi3ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i3 signext %5, i32 1, i32 2, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_3]] [[Exp2_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExp2INTEL [[Ty_5]] [[#]] [[Exp2_AId]] 1 2 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExp2INTEL ; CHECK-LLVM: call i5 @intel_arbitrary_float_exp2.i5.i3(i3 %[[#]], i32 1, i32 2, i32 0, i32 2, i32 1) store i5 %6, i5* %2, align 1, !tbaa !41 %7 = bitcast i5* %2 to i8* @@ -1195,7 +1168,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_exp10ILi8ELi16ELi8ELi %6 = call spir_func signext i25 @_Z32__spirv_ArbitraryFloatExp10INTELILi25ELi25EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i25 signext %5, i32 16, i32 16, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_25]] [[Exp10_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExp10INTEL [[Ty_25]] [[#]] [[Exp10_AId]] 16 16 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExp10INTEL ; CHECK-LLVM: call i25 @intel_arbitrary_float_exp10.i25.i25(i25 %[[#]], i32 16, i32 16, i32 0, i32 2, i32 1) store i25 %6, i25* %2, align 4, !tbaa !13 %7 = bitcast i25* %2 to i8* @@ -1217,7 +1189,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_expm1ILi21ELi42ELi20E %6 = call spir_func i62 @_Z32__spirv_ArbitraryFloatExpm1INTELILi64ELi62EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i64 %5, i32 42, i32 41, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_64]] [[Expm1_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExpm1INTEL [[Ty_62]] [[#]] [[Expm1_AId]] 42 41 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExpm1INTEL ; CHECK-LLVM: call i62 @intel_arbitrary_float_expm1.i62.i64(i64 %[[#]], i32 42, i32 41, i32 0, i32 2, i32 1) store i62 %6, i62* %2, align 8, !tbaa !79 %7 = bitcast i62* %2 to i8* @@ -1239,7 +1210,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_sinILi14ELi15ELi16ELi %6 = call spir_func i34 @_Z30__spirv_ArbitraryFloatSinINTELILi30ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i30 signext %5, i32 15, i32 17, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_30]] [[Sin_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinINTEL [[Ty_34]] [[#]] [[Sin_AId]] 15 17 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinINTEL ; CHECK-LLVM: call i34 @intel_arbitrary_float_sin.i34.i30(i30 %[[#]], i32 15, i32 17, i32 0, i32 2, i32 1) store i34 %6, i34* %2, align 8, !tbaa !53 %7 = bitcast i34* %2 to i8* @@ -1261,7 +1231,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_cosILi1ELi2ELi2ELi1EE %6 = call spir_func signext i4 @_Z30__spirv_ArbitraryFloatCosINTELILi4ELi4EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i4 signext %5, i32 2, i32 1, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_4]] [[Cos_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCosINTEL [[Ty_4]] [[#]] [[Cos_AId]] 2 1 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCosINTEL ; CHECK-LLVM: call i4 @intel_arbitrary_float_cos.i4.i4(i4 %[[#]], i32 2, i32 1, i32 0, i32 2, i32 1) store i4 %6, i4* %2, align 1, !tbaa !81 %7 = bitcast i4* %2 to i8* @@ -1283,7 +1252,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_sincosILi8ELi18ELi10E %6 = call spir_func i62 @_Z33__spirv_ArbitraryFloatSinCosINTELILi27ELi31EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEiiiiii(i27 signext %5, i32 18, i32 20, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_27]] [[SinCos_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinCosINTEL [[Ty_62]] [[#]] [[SinCos_AId]] 18 20 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinCosINTEL ; CHECK-LLVM: call i62 @intel_arbitrary_float_sincos.i62.i27(i27 %[[#]], i32 18, i32 20, i32 0, i32 2, i32 1) store i62 %6, i62* %2, align 8, !tbaa !79 %7 = bitcast i62* %2 to i8* @@ -1305,7 +1273,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_sinpiILi3ELi6ELi6ELi6 %6 = call spir_func signext i13 @_Z32__spirv_ArbitraryFloatSinPiINTELILi10ELi13EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i10 signext %5, i32 6, i32 6, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_10]] [[SinPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinPiINTEL [[Ty_13]] [[#]] [[SinPi_AId]] 6 6 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinPiINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_sinpi.i13.i10(i10 %[[#]], i32 6, i32 6, i32 0, i32 2, i32 1) store i13 %6, i13* %2, align 2, !tbaa !19 %7 = bitcast i13* %2 to i8* @@ -1327,7 +1294,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_cospiILi18ELi40ELi18E %6 = call spir_func i59 @_Z32__spirv_ArbitraryFloatCosPiINTELILi59ELi59EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i59 %5, i32 40, i32 40, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_59]] [[CosPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCosPiINTEL [[Ty_59]] [[#]] [[CosPi_AId]] 40 40 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCosPiINTEL ; CHECK-LLVM: call i59 @intel_arbitrary_float_cospi.i59.i59(i59 %[[#]], i32 40, i32 40, i32 0, i32 2, i32 1) store i59 %6, i59* %2, align 8, !tbaa !85 %7 = bitcast i59* %2 to i8* @@ -1349,7 +1315,6 @@ define linkonce_odr dso_local spir_func void @_Z17ap_float_sincospiILi9ELi20ELi1 %6 = call spir_func i64 @_Z35__spirv_ArbitraryFloatSinCosPiINTELILi30ELi32EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEiiiiii(i30 signext %5, i32 20, i32 20, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_30]] [[SinCosPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinCosPiINTEL [[Ty_64]] [[#]] [[SinCosPi_AId]] 20 20 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinCosPiINTEL ; CHECK-LLVM: call i64 @intel_arbitrary_float_sincospi.i64.i30(i30 %[[#]], i32 20, i32 20, i32 0, i32 2, i32 1) store i64 %6, i64* %2, align 8, !tbaa !77 %7 = bitcast i64* %2 to i8* @@ -1371,7 +1336,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_asinILi2ELi4ELi2ELi8E %6 = call spir_func signext i11 @_Z31__spirv_ArbitraryFloatASinINTELILi7ELi11EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i7 signext %5, i32 4, i32 8, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_7]] [[ASin_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatASinINTEL [[Ty_11]] [[#]] [[ASin_AId]] 4 8 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatASinINTEL ; CHECK-LLVM: call i11 @intel_arbitrary_float_asin.i11.i7(i7 %[[#]], i32 4, i32 8, i32 0, i32 2, i32 1) store i11 %6, i11* %2, align 2, !tbaa !27 %7 = bitcast i11* %2 to i8* @@ -1393,7 +1357,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_asinpiILi11ELi23ELi11 %6 = call spir_func i35 @_Z33__spirv_ArbitraryFloatASinPiINTELILi35ELi35EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i35 %5, i32 23, i32 23, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_35]] [[ASinPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatASinPiINTEL [[Ty_35]] [[#]] [[ASinPi_AId]] 23 23 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatASinPiINTEL ; CHECK-LLVM: call i35 @intel_arbitrary_float_asinpi.i35.i35(i35 %[[#]], i32 23, i32 23, i32 0, i32 2, i32 1) store i35 %6, i35* %2, align 8, !tbaa !87 %7 = bitcast i35* %2 to i8* @@ -1415,7 +1378,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_acosILi4ELi9ELi3ELi10 %6 = call spir_func signext i14 @_Z31__spirv_ArbitraryFloatACosINTELILi14ELi14EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i14 signext %5, i32 9, i32 10, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_14]] [[ACos_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatACosINTEL [[Ty_14]] [[#]] [[ACos_AId]] 9 10 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatACosINTEL ; CHECK-LLVM: call i14 @intel_arbitrary_float_acos.i14.i14(i14 %[[#]], i32 9, i32 10, i32 0, i32 2, i32 1) store i14 %6, i14* %2, align 2, !tbaa !23 %7 = bitcast i14* %2 to i8* @@ -1435,7 +1397,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_acospiILi2ELi5ELi3ELi %4 = call spir_func signext i8 @_Z33__spirv_ArbitraryFloatACosPiINTELILi8ELi8EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i8 signext %3, i32 5, i32 4, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_8]] [[ACosPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatACosPiINTEL [[Ty_8]] [[#]] [[ACosPi_AId]] 5 4 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatACosPiINTEL ; CHECK-LLVM: call i8 @intel_arbitrary_float_acospi.i8.i8(i8 %[[#]], i32 5, i32 4, i32 0, i32 2, i32 1) store i8 %4, i8* %2, align 1, !tbaa !67 call void @llvm.lifetime.end.p0i8(i64 1, i8* %2) #5 @@ -1455,7 +1416,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_atanILi12ELi31ELi12EL %6 = call spir_func i44 @_Z31__spirv_ArbitraryFloatATanINTELILi44ELi44EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i44 %5, i32 31, i32 31, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_44]] [[ATan_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatATanINTEL [[Ty_44]] [[#]] [[ATan_AId]] 31 31 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatATanINTEL ; CHECK-LLVM: call i44 @intel_arbitrary_float_atan.i44.i44(i44 %[[#]], i32 31, i32 31, i32 0, i32 2, i32 1) store i44 %6, i44* %2, align 8, !tbaa !89 %7 = bitcast i44* %2 to i8* @@ -1477,7 +1437,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_atanpiILi1ELi38ELi1EL %6 = call spir_func i34 @_Z33__spirv_ArbitraryFloatATanPiINTELILi40ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i40 %5, i32 38, i32 32, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_40]] [[ATanPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatATanPiINTEL [[Ty_34]] [[#]] [[ATanPi_AId]] 38 32 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatATanPiINTEL ; CHECK-LLVM: call i34 @intel_arbitrary_float_atanpi.i34.i40(i40 %[[#]], i32 38, i32 32, i32 0, i32 2, i32 1) store i34 %6, i34* %2, align 8, !tbaa !53 %7 = bitcast i34* %2 to i8* @@ -1504,7 +1463,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_atan2ILi7ELi16ELi7ELi ; CHECK-SPIRV: 6 Load [[Ty_24]] [[ATan2_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_25]] [[ATan2_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatATan2INTEL [[Ty_27]] [[#]] [[ATan2_AId]] 16 [[ATan2_BId]] 17 18 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatATan2INTEL ; CHECK-LLVM: call i27 @intel_arbitrary_float_atan2.i27.i24.i25(i24 %[[#]], i32 16, i25 %[[#]], i32 17, i32 18, i32 0, i32 2, i32 1) store i27 %9, i27* %3, align 4, !tbaa !83 %10 = bitcast i27* %3 to i8* @@ -1533,7 +1491,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_powILi8ELi8ELi9ELi9EL ; CHECK-SPIRV: 6 Load [[Ty_17]] [[Pow_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_19]] [[Pow_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatPowINTEL [[Ty_21]] [[#]] [[Pow_AId]] 8 [[Pow_BId]] 9 10 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatPowINTEL ; CHECK-LLVM: call i21 @intel_arbitrary_float_pow.i21.i17.i19(i17 %[[#]], i32 8, i19 %[[#]], i32 9, i32 10, i32 0, i32 2, i32 1) store i21 %9, i21* %3, align 4, !tbaa !95 %10 = bitcast i21* %3 to i8* @@ -1562,7 +1519,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_powrILi18ELi35ELi19EL ; CHECK-SPIRV: 6 Load [[Ty_54]] [[PowR_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_55]] [[PowR_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatPowRINTEL [[Ty_56]] [[#]] [[PowR_AId]] 35 [[PowR_BId]] 35 35 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatPowRINTEL ; CHECK-LLVM: call i56 @intel_arbitrary_float_powr.i56.i54.i55(i54 %[[#]], i32 35, i55 %[[#]], i32 35, i32 35, i32 0, i32 2, i32 1) store i56 %9, i56* %3, align 8, !tbaa !99 %10 = bitcast i56* %3 to i8* @@ -1591,7 +1547,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_pownILi4ELi7ELi10ELi5 ; CHECK-SPIRV: 6 Load [[Ty_12]] [[PowN_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_10]] [[PowN_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 10 ArbitraryFloatPowNINTEL [[Ty_15]] [[#]] [[PowN_AId]] 7 [[PowN_BId]] 9 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatPowNINTEL ; CHECK-LLVM: call i15 @intel_arbitrary_float_pown.i15.i12.i10(i12 %[[#]], i32 7, i10 %[[#]], i32 9, i32 0, i32 2, i32 1) store i15 %9, i15* %3, align 2, !tbaa !21 %10 = bitcast i15* %3 to i8* diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 73c1e96d3d9ad..5f07acc1f6922 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -8,9 +8,8 @@ function(tablegen project ofn) message(FATAL_ERROR "${project}_TABLEGEN_EXE not set") endif() - # Use depfile instead of globbing arbitrary *.td(s) - # DEPFILE is available for Ninja Generator with CMake>=3.7. - if(CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.7) + # Use depfile instead of globbing arbitrary *.td(s) for Ninja. + if(CMAKE_GENERATOR STREQUAL "Ninja") # Make output path relative to build.ninja, assuming located on # ${CMAKE_BINARY_DIR}. # CMake emits build targets as relative paths but Ninja doesn't identify @@ -80,14 +79,6 @@ function(tablegen project ofn) set(tblgen_change_flag "--write-if-changed") endif() - # With CMake 3.12 this can be reduced to: - # get_directory_property(tblgen_includes "INCLUDE_DIRECTORIES") - # list(TRANSFORM tblgen_includes PREPEND -I) - set(tblgen_includes) - get_directory_property(includes "INCLUDE_DIRECTORIES") - foreach(include ${includes}) - list(APPEND tblgen_includes -I ${include}) - endforeach() # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list # (both the target and the file) to have .inc files rebuilt on # a tablegen change, as cmake does not propagate file-level dependencies @@ -97,6 +88,9 @@ function(tablegen project ofn) # dependency twice in the result file when # ("${${project}_TABLEGEN_TARGET}" STREQUAL "${${project}_TABLEGEN_EXE}") # but lets us having smaller and cleaner code here. + get_directory_property(tblgen_includes INCLUDE_DIRECTORIES) + list(TRANSFORM tblgen_includes PREPEND -I) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} ${tblgen_includes} @@ -139,8 +133,8 @@ macro(add_tablegen target project) set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen) - # CMake-3.9 doesn't let compilation units depend on their dependent libraries. - if(NOT (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.9) AND NOT XCODE) + # CMake doesn't let compilation units depend on their dependent libraries on some generators. + if(NOT CMAKE_GENERATOR STREQUAL "Ninja" AND NOT XCODE) # FIXME: It leaks to user, callee of add_tablegen. set(LLVM_ENABLE_OBJLIB ON) endif() diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ce791b3aa9d48..781b2385de500 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6885,7 +6885,7 @@ where the first ``param`` is the number of the parameter it describes, which can be accessed by the function. This range does not include accesses by function calls from ``calls`` list. -where each ``Callee`` decribes how parameter is forwared into other +where each ``Callee`` describes how parameter is forwarded into other functions and looks like: .. code-block:: text @@ -12479,11 +12479,11 @@ very cleanly specified and it is unwise to depend on it. Semantics: """""""""" -The '``llvm.memcpy.*``' intrinsics copy a block of memory from the -source location to the destination location, which are not allowed to -overlap. It copies "len" bytes of memory over. If the argument is known -to be aligned to some boundary, this can be specified as an attribute on -the argument. +The '``llvm.memcpy.*``' intrinsics copy a block of memory from the source +location to the destination location, which must either be equal or +non-overlapping. It copies "len" bytes of memory over. If the argument is known +to be aligned to some boundary, this can be specified as an attribute on the +argument. If "len" is 0, the pointers may be NULL, dangling, ``undef``, or ``poison`` pointers. However, they must still be appropriately aligned. @@ -15952,8 +15952,8 @@ Arguments: """""""""" The first argument ``%Ptr`` is a pointer type to the returned vector type, and -correponds to the start address to load from. The second argument ``%Stride`` -is a postive, constant integer with ``%Stride >= ``. ``%Stride`` is used +corresponds to the start address to load from. The second argument ``%Stride`` +is a positive, constant integer with ``%Stride >= ``. ``%Stride`` is used to compute the column memory addresses. I.e., for a column ``C``, its start memory addresses is calculated with ``%Ptr + C * %Stride``. The third Argument ```` is a boolean value. The fourth and fifth arguments, diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 2992091d69699..f7daf30bddcfa 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -24,10 +24,10 @@ Security Group Members The members of the group represent a wide cross-section of the community, and meet the criteria for inclusion below. +* Ahmed Bougacha (Apple) * Akila Srinivasan (Apple) * Dimitry Andric (individual; FreeBSD) * Ed Maste (individual; FreeBSD) -* JF Bastien (Apple) * Josh Eads (Sony) * Kristof Beyls (ARM) * Matthew Riley (Google) diff --git a/llvm/include/llvm-c/DataTypes.h b/llvm/include/llvm-c/DataTypes.h index 0f27ba81865e0..4eb0ac97d97e5 100644 --- a/llvm/include/llvm-c/DataTypes.h +++ b/llvm/include/llvm-c/DataTypes.h @@ -77,8 +77,4 @@ typedef signed int ssize_t; # define UINT64_MAX 0xffffffffffffffffULL #endif -#ifndef HUGE_VALF -#define HUGE_VALF (float)HUGE_VAL -#endif - #endif /* LLVM_C_DATATYPES_H */ diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index e3032a19f111f..5e4206732f4df 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1450,6 +1450,14 @@ class LLVM_NODISCARD APInt { setBit(BitWidth - 1); } + /// Set a given bit to a given value. + void setBitVal(unsigned BitPosition, bool BitValue) { + if (BitValue) + setBit(BitPosition); + else + clearBit(BitPosition); + } + /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls /// setBits when \p loBit < \p hiBit. diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h index f8c8fec0ec9e7..0a7dcfe226315 100644 --- a/llvm/include/llvm/ADT/CoalescingBitVector.h +++ b/llvm/include/llvm/ADT/CoalescingBitVector.h @@ -34,15 +34,14 @@ namespace llvm { /// performance for non-sequential find() operations. /// /// \tparam IndexT - The type of the index into the bitvector. -/// \tparam N - The first N coalesced intervals of set bits are stored in-place. -template class CoalescingBitVector { +template class CoalescingBitVector { static_assert(std::is_unsigned::value, "Index must be an unsigned integer."); - using ThisT = CoalescingBitVector; + using ThisT = CoalescingBitVector; /// An interval map for closed integer ranges. The mapped values are unused. - using MapT = IntervalMap; + using MapT = IntervalMap; using UnderlyingIterator = typename MapT::const_iterator; diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 3ccee3d21d48b..c3c6a366dab2d 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -60,7 +60,14 @@ template class SmallVectorBase { /// This is an implementation of the grow() method which only works /// on POD-like data types and is out of line to reduce code duplication. /// This function will report a fatal error if it cannot increase capacity. - void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize); + void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); + + /// Report that MinSize doesn't fit into this vector's size type. Throws + /// std::length_error or calls report_fatal_error. + LLVM_ATTRIBUTE_NORETURN static void report_size_overflow(size_t MinSize); + /// Report that this vector is already at maximum capacity. Throws + /// std::length_error or calls report_fatal_error. + LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(); public: size_t size() const { return Size; } @@ -115,8 +122,8 @@ class SmallVectorTemplateCommon protected: SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} - void grow_pod(size_t MinCapacity, size_t TSize) { - Base::grow_pod(getFirstEl(), MinCapacity, TSize); + void grow_pod(size_t MinSize, size_t TSize) { + Base::grow_pod(getFirstEl(), MinSize, TSize); } /// Return true if this is a smallvector which has not had dynamic @@ -269,14 +276,14 @@ void SmallVectorTemplateBase::grow(size_t MinSize) { // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. if (MinSize > this->SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity overflow during allocation"); + this->report_size_overflow(MinSize); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a - // default MinCapacity of 0, but the current capacity cannot be increased. + // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. if (this->capacity() == this->SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity unable to grow"); + this->report_at_maximum_capacity(); // Always grow, even from zero. size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h index 0fea81e215c91..6eb637e727821 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -19,30 +19,30 @@ #ifndef LLVM_ANALYSIS_LINT_H #define LLVM_ANALYSIS_LINT_H +#include "llvm/IR/PassManager.h" + namespace llvm { class FunctionPass; class Module; class Function; -/// Create a lint pass. -/// -/// Check a module or function. -FunctionPass *createLintPass(); +FunctionPass *createLintLegacyPassPass(); -/// Check a module. +/// Lint a module. /// /// This should only be used for debugging, because it plays games with /// PassManagers and stuff. -void lintModule( - const Module &M ///< The module to be checked -); +void lintModule(const Module &M); + +// Lint a function. +void lintFunction(const Function &F); -// lintFunction - Check a function. -void lintFunction( - const Function &F ///< The function to be checked -); +class LintPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; -} // End llvm namespace +} // namespace llvm -#endif +#endif // LLVM_ANALYSIS_LINT_H diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 2f64b0fedc7aa..9fdbf638078f4 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -245,6 +245,30 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16) + TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 074960e7ced20..8498335bf78e6 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -358,7 +358,7 @@ int getSplatIndex(ArrayRef Mask); /// Get splat value if the input is a splat vector or return nullptr. /// The value may be extracted from a splat constants vector or from /// a sequence of instructions that broadcast a single value into a vector. -const Value *getSplatValue(const Value *V); +Value *getSplatValue(const Value *V); /// Return true if each element of the vector value \p V is poisoned or equal to /// every other non-poisoned element. If an index element is specified, either diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index a1eb4d0383fb2..8fa5646879c83 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -312,6 +312,7 @@ enum { EM_LANAI = 244, // Lanai 32-bit processor EM_BPF = 247, // Linux kernel bpf virtual machine EM_VE = 251, // NEC SX-Aurora VE + EM_CSKY = 252, // C-SKY 32-bit processor }; // Object file classes. @@ -772,6 +773,12 @@ enum { #include "ELFRelocs/VE.def" }; + +// ELF Relocation types for CSKY +enum { +#include "ELFRelocs/CSKY.def" +}; + #undef ELF_RELOC // Section header. diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def new file mode 100644 index 0000000000000..c5f2dbae8033c --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def @@ -0,0 +1,74 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_CKCORE_NONE, 0) +ELF_RELOC(R_CKCORE_ADDR32, 1) +ELF_RELOC(R_CKCORE_PCREL_IMM8_4, 2) +ELF_RELOC(R_CKCORE_PCREL_IMM11_2, 3) +ELF_RELOC(R_CKCORE_PCREL_IMM4_2, 4) +ELF_RELOC(R_CKCORE_PCREL32, 5) +ELF_RELOC(R_CKCORE_PCREL_JSR_IMM11_2, 6) +ELF_RELOC(R_CKCORE_GNU_VTINHERIT, 7) +ELF_RELOC(R_CKCORE_GNU_VTENTRY, 8) +ELF_RELOC(R_CKCORE_RELATIVE, 9) +ELF_RELOC(R_CKCORE_COPY, 10) +ELF_RELOC(R_CKCORE_GLOB_DAT, 11) +ELF_RELOC(R_CKCORE_JUMP_SLOT, 12) +ELF_RELOC(R_CKCORE_GOTOFF, 13) +ELF_RELOC(R_CKCORE_GOTPC, 14) +ELF_RELOC(R_CKCORE_GOT32, 15) +ELF_RELOC(R_CKCORE_PLT32, 16) +ELF_RELOC(R_CKCORE_ADDRGOT, 17) +ELF_RELOC(R_CKCORE_ADDRPLT, 18) +ELF_RELOC(R_CKCORE_PCREL_IMM26_2, 19) +ELF_RELOC(R_CKCORE_PCREL_IMM16_2, 20) +ELF_RELOC(R_CKCORE_PCREL_IMM16_4, 21) +ELF_RELOC(R_CKCORE_PCREL_IMM10_2, 22) +ELF_RELOC(R_CKCORE_PCREL_IMM10_4, 23) +ELF_RELOC(R_CKCORE_ADDR_HI16, 24) +ELF_RELOC(R_CKCORE_ADDR_LO16, 25) +ELF_RELOC(R_CKCORE_GOTPC_HI16, 26) +ELF_RELOC(R_CKCORE_GOTPC_LO16, 27) +ELF_RELOC(R_CKCORE_GOTOFF_HI16, 28) +ELF_RELOC(R_CKCORE_GOTOFF_LO16, 29) +ELF_RELOC(R_CKCORE_GOT12, 30) +ELF_RELOC(R_CKCORE_GOT_HI16, 31) +ELF_RELOC(R_CKCORE_GOT_LO16, 32) +ELF_RELOC(R_CKCORE_PLT12, 33) +ELF_RELOC(R_CKCORE_PLT_HI16, 34) +ELF_RELOC(R_CKCORE_PLT_LO16, 35) +ELF_RELOC(R_CKCORE_ADDRGOT_HI16, 36) +ELF_RELOC(R_CKCORE_ADDRGOT_LO16, 37) +ELF_RELOC(R_CKCORE_ADDRPLT_HI16, 38) +ELF_RELOC(R_CKCORE_ADDRPLT_LO16, 39) +ELF_RELOC(R_CKCORE_PCREL_JSR_IMM26_2, 40) +ELF_RELOC(R_CKCORE_TOFFSET_LO16, 41) +ELF_RELOC(R_CKCORE_DOFFSET_LO16, 42) +ELF_RELOC(R_CKCORE_PCREL_IMM18_2, 43) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18, 44) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18_2, 45) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18_4, 46) +ELF_RELOC(R_CKCORE_GOTOFF_IMM18, 47) +ELF_RELOC(R_CKCORE_GOT_IMM18_4, 48) +ELF_RELOC(R_CKCORE_PLT_IMM18_4, 49) +ELF_RELOC(R_CKCORE_PCREL_IMM7_4, 50) +ELF_RELOC(R_CKCORE_TLS_LE32, 51) +ELF_RELOC(R_CKCORE_TLS_IE32, 52) +ELF_RELOC(R_CKCORE_TLS_GD32, 53) +ELF_RELOC(R_CKCORE_TLS_LDM32, 54) +ELF_RELOC(R_CKCORE_TLS_LDO32, 55) +ELF_RELOC(R_CKCORE_TLS_DTPMOD32, 56) +ELF_RELOC(R_CKCORE_TLS_DTPOFF32, 57) +ELF_RELOC(R_CKCORE_TLS_TPOFF32, 58) +ELF_RELOC(R_CKCORE_PCREL_FLRW_IMM8_4, 59) +ELF_RELOC(R_CKCORE_NOJSRI, 60) +ELF_RELOC(R_CKCORE_CALLGRAPH, 61) +ELF_RELOC(R_CKCORE_IRELATIVE, 62) +ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM4_4, 63) +ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM12_4, 64) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_1, 65) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_2, 66) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_4, 67) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_8, 68) diff --git a/llvm/include/llvm/CodeGen/AntiDepBreaker.h b/llvm/include/llvm/CodeGen/AntiDepBreaker.h index d75c13e2dd756..0553d7d452a46 100644 --- a/llvm/include/llvm/CodeGen/AntiDepBreaker.h +++ b/llvm/include/llvm/CodeGen/AntiDepBreaker.h @@ -17,7 +17,6 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 8ebe788ac360a..52c88d8af45b1 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" @@ -25,6 +25,7 @@ namespace llvm { class CCState; +class MachineFunction; class MVT; class TargetRegisterInfo; @@ -432,10 +433,7 @@ class CCState { return AllocateStack(Size, Align(Alignment)); } - void ensureMaxAlignment(Align Alignment) { - if (!AnalyzingMustTailForwardedRegs) - MF.getFrameInfo().ensureMaxAlignment(Alignment); - } + void ensureMaxAlignment(Align Alignment); /// Version of AllocateStack with extra register to be shadowed. LLVM_ATTRIBUTE_DEPRECATED(unsigned AllocateStack(unsigned Size, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index ef93042f6690d..dbd7e00c429aa 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -17,8 +17,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include @@ -30,11 +32,9 @@ class CallBase; class DataLayout; class Function; class MachineIRBuilder; -class MachineOperand; struct MachinePointerInfo; class MachineRegisterInfo; class TargetLowering; -class Type; class Value; class CallLowering { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e1f0535affcdb..8607ad02d5063 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -356,8 +356,8 @@ class CombinerHelper { bool matchRedundantSExtInReg(MachineInstr &MI); /// Combine inverting a result of a compare into the opposite cond code. - bool matchNotCmp(MachineInstr &MI, Register &CmpReg); - bool applyNotCmp(MachineInstr &MI, Register &CmpReg); + bool matchNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); + bool applyNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 033d5b4b58348..38eb0e4bebe74 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -20,9 +20,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" -#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Allocator.h" @@ -37,7 +38,6 @@ class CallInst; class CallLowering; class Constant; class DataLayout; -class FunctionLoweringInfo; class Instruction; class MachineBasicBlock; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index b7d2489eda23c..50534860bec16 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -16,10 +16,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MachineValueType.h" +#include namespace llvm { @@ -34,10 +33,10 @@ class MachineRegisterInfo; class MCInstrDesc; class RegisterBankInfo; class TargetInstrInfo; +class TargetLowering; class TargetPassConfig; class TargetRegisterInfo; class TargetRegisterClass; -class Twine; class ConstantFP; class APFloat; diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h index e57c32c5ae614..9cb92091db50b 100644 --- a/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -14,11 +14,15 @@ #ifndef LLVM_CODEGEN_MIRFORMATTER_H #define LLVM_CODEGEN_MIRFORMATTER_H -#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/raw_ostream.h" +#include namespace llvm { +class MachineFunction; +class MachineInstr; struct PerFunctionMIParsingState; struct SlotMapping; diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index 0f252137364cf..b7e89cf4b133f 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -728,12 +728,12 @@ class MachineOperand { /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. - void ChangeToImmediate(int64_t ImmVal); + void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags = 0); /// ChangeToFPImmediate - Replace this operand with a new FP immediate operand /// of the specified value. If an operand is known to be an FP immediate /// already, the setFPImm method should be used. - void ChangeToFPImmediate(const ConstantFP *FPImm); + void ChangeToFPImmediate(const ConstantFP *FPImm, unsigned TargetFlags = 0); /// ChangeToES - Replace this operand with a new external symbol operand. void ChangeToES(const char *SymName, unsigned TargetFlags = 0); @@ -743,10 +743,10 @@ class MachineOperand { unsigned TargetFlags = 0); /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. - void ChangeToMCSymbol(MCSymbol *Sym); + void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags = 0); /// Replace this operand with a frame index. - void ChangeToFrameIndex(int Idx); + void ChangeToFrameIndex(int Idx, unsigned TargetFlags = 0); /// Replace this operand with a target index. void ChangeToTargetIndex(unsigned Idx, int64_t Offset, @@ -759,6 +759,11 @@ class MachineOperand { bool isKill = false, bool isDead = false, bool isUndef = false, bool isDebug = false); + /// getTargetIndexName - If this MachineOperand is a TargetIndex that has a + /// name, attempt to get the name. Returns nullptr if the TargetIndex does not + /// have a name. Asserts if MO is not a TargetIndex. + const char *getTargetIndexName() const; + //===--------------------------------------------------------------------===// // Construction methods. //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h new file mode 100644 index 0000000000000..a5e85aef099dc --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -0,0 +1,30 @@ +//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stable hashing for MachineInstr and MachineOperand. Useful or getting a +// hash across runs, modules, etc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H +#define LLVM_CODEGEN_MACHINESTABLEHASH_H + +#include "llvm/CodeGen/StableHashing.h" + +namespace llvm { +class MachineInstr; +class MachineOperand; + +stable_hash stableHashValue(const MachineOperand &MO); +stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, + bool HashConstantPoolIndices = false, + bool HashMemOperands = false); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index c285a68da9b03..5607e785e349a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -27,7 +27,6 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/DAGCombine.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -64,6 +63,7 @@ class ConstantFP; class ConstantInt; class DataLayout; struct fltSemantics; +class FunctionLoweringInfo; class GlobalValue; struct KnownBits; class LegacyDivergenceAnalysis; diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index cde075f41f739..6eef79162f8a7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,9 +357,8 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. - // Flag bits can only be masked out during intersection if the masking flags - // are defined. + // This bit is used to determine if the flags are in a defined state. It is + // only used by SelectionDAGBuilder. bool AnyDefined : 1; bool NoUnsignedWrap : 1; @@ -464,11 +463,9 @@ struct SDNodeFlags { bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } - /// Clear any flags in this flag set that aren't also set in Flags. - /// If the given Flags are undefined then don't do anything. + /// Clear any flags in this flag set that aren't also set in Flags. All + /// flags will be cleared if Flags are undefined. void intersectWith(const SDNodeFlags Flags) { - if (!Flags.isDefined()) - return; NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h new file mode 100644 index 0000000000000..c6113aa93c800 --- /dev/null +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -0,0 +1,112 @@ +//===- llvm/CodeGen/StableHashing.h - Utilities for stable hashing * C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides types and functions for computing and combining stable +// hashes. Stable hashes can be useful for hashing across different modules, +// processes, or compiler runs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_STABLEHASHING_H +#define LLVM_CODEGEN_STABLEHASHING_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +/// An opaque object representing a stable hash code. It can be serialized, +/// deserialized, and is stable across processes and executions. +using stable_hash = uint64_t; + +// Implementation details +namespace hashing { +namespace detail { + +// Stable hashes are based on the 64-bit FNV-1 hash: +// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function + +const uint64_t FNV_PRIME_64 = 1099511628211u; +const uint64_t FNV_OFFSET_64 = 14695981039346656037u; + +inline void stable_hash_append(stable_hash &Hash, const char Value) { + Hash = Hash ^ (Value & 0xFF); + Hash = Hash * FNV_PRIME_64; +} + +inline void stable_hash_append(stable_hash &Hash, stable_hash Value) { + for (unsigned I = 0; I < 8; ++I) { + stable_hash_append(Hash, (const char)Value); + Value >>= 8; + } +} + +} // namespace detail +} // namespace hashing + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + return Hash; +} + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, + stable_hash C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + hashing::detail::stable_hash_append(Hash, C); + return Hash; +} + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, + stable_hash C, stable_hash D) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + hashing::detail::stable_hash_append(Hash, C); + hashing::detail::stable_hash_append(Hash, D); + return Hash; +} + +/// Compute a stable_hash for a sequence of values. +/// +/// This hashes a sequence of values. It produces the same stable_hash as +/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized +/// sequences and is significantly faster given pointers and types which +/// can be hashed as a sequence of bytes. +template +stable_hash stable_hash_combine_range(InputIteratorT First, + InputIteratorT Last) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + for (auto I = First; I != Last; ++I) + hashing::detail::stable_hash_append(Hash, *I); + return Hash; +} + +inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + for (size_t I = 0; I < C; ++I) + hashing::detail::stable_hash_append(Hash, P[I]); + return Hash; +} + +inline stable_hash stable_hash_combine_string(const StringRef &S) { + return stable_hash_combine_range(S.begin(), S.end()); +} + +inline stable_hash stable_hash_combine_string(const char *C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + while (*C) + hashing::detail::stable_hash_append(Hash, *(C++)); + return Hash; +} + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h index d35c8abc84a2e..7c1b72befde76 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h @@ -14,6 +14,7 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H #define LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" namespace llvm { @@ -21,6 +22,8 @@ namespace orc { class TPCDynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { public: + using SymbolPredicate = unique_function; + /// Create a DynamicLibrarySearchGenerator that searches for symbols in the /// library with the given handle. /// @@ -28,19 +31,22 @@ class TPCDynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { /// will be searched for. If the predicate is not given then all symbols will /// be searched for. TPCDynamicLibrarySearchGenerator(TargetProcessControl &TPC, - TargetProcessControl::DylibHandle H) - : TPC(TPC), H(H) {} + TargetProcessControl::DylibHandle H, + SymbolPredicate Allow = SymbolPredicate()) + : TPC(TPC), H(H), Allow(std::move(Allow)) {} /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. static Expected> - Load(TargetProcessControl &TPC, const char *LibraryPath); + Load(TargetProcessControl &TPC, const char *LibraryPath, + SymbolPredicate Allow = SymbolPredicate()); /// Creates a TPCDynamicLibrarySearchGenerator that searches for symbols in /// the target process. static Expected> - GetForTargetProcess(TargetProcessControl &TPC) { + GetForTargetProcess(TargetProcessControl &TPC, + SymbolPredicate Allow = SymbolPredicate()) { return Load(TPC, nullptr); } @@ -51,6 +57,7 @@ class TPCDynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { private: TargetProcessControl &TPC; TargetProcessControl::DylibHandle H; + SymbolPredicate Allow; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h index 159b6e8d56df3..d3349753284e2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h @@ -149,8 +149,11 @@ class TargetProcessControl { virtual Expected loadDylib(const char *DylibPath) = 0; /// Search for symbols in the target process. + /// /// The result of the lookup is a 2-dimentional array of target addresses - /// that correspond to the lookup order. + /// that correspond to the lookup order. If a required symbol is not + /// found then this method will return an error. If a weakly referenced + /// symbol is not found then it be assigned a '0' value in the result. virtual Expected lookupSymbols(LookupRequest Request) = 0; protected: diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index c29d20c1729ba..9ba9ea68f9898 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -70,13 +70,12 @@ class IntrinsicInst : public CallInst { case Intrinsic::uadd_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: - // TODO: These fixed-point math intrinsics have commutative first two - // operands, but callers may not handle instructions with more than - // two operands. - // case Intrinsic::smul_fix: - // case Intrinsic::umul_fix: - // case Intrinsic::smul_fix_sat: - // case Intrinsic::umul_fix_sat: + case Intrinsic::smul_fix: + case Intrinsic::umul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix_sat: + case Intrinsic::fma: + case Intrinsic::fmuladd: return true; default: return false; diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 1ef44b735c9fc..73a49ec77f8b4 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -193,6 +193,13 @@ class PowerPC_Vec_QQQ_Intrinsic [llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; +/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64 +/// vectors and returns one v1i128. These intrinsics have no side effects. +class PowerPC_Vec_QDD_Intrinsic + : PowerPC_Vec_Intrinsic; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. // @@ -448,6 +455,18 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Expand with Mask + def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], @@ -673,6 +692,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; // Vector Multiply Instructions. def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">, @@ -684,6 +706,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">; def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -693,6 +716,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">; def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], @@ -703,6 +727,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">; def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -712,6 +737,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">; // Vector Sum Instructions. def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 18ccb8f88cf1b..5d11a25c7a9ad 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -219,7 +219,7 @@ void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); -void initializeLintPass(PassRegistry&); +void initializeLintLegacyPassPass(PassRegistry &); void initializeLiveDebugValuesPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 71af80da5b804..41175ee4c56f5 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -210,7 +210,7 @@ namespace { (void) llvm::createPrintFunctionPass(os); (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); - (void) llvm::createLintPass(); + (void) llvm::createLintLegacyPassPass(); (void) llvm::createSinkingPass(); (void) llvm::createLowerAtomicPass(); (void) llvm::createCorrelatedValuePropagationPass(); diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 8049020f564b6..74d4745c1034f 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1112,6 +1112,8 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf32-powerpc"; case ELF::EM_RISCV: return "elf32-littleriscv"; + case ELF::EM_CSKY: + return "elf32-csky"; case ELF::EM_SPARC: case ELF::EM_SPARC32PLUS: return "elf32-sparc"; @@ -1224,6 +1226,8 @@ template Triple::ArchType ELFObjectFile::getArch() const { case ELF::EM_VE: return Triple::ve; + case ELF::EM_CSKY: + return Triple::csky; default: return Triple::UnknownArch; } diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 19b7f3500ee67..99a7af87d2c78 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -215,7 +215,7 @@ struct Data { Optional> DebugStrOffsets; Optional> DebugAranges; std::vector DebugRanges; - std::vector DebugAddr; + Optional> DebugAddr; Optional PubNames; Optional PubTypes; diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h index 7cbc668b3a0e8..0ec0242d569d3 100644 --- a/llvm/include/llvm/Support/ErrorHandling.h +++ b/llvm/include/llvm/Support/ErrorHandling.h @@ -110,9 +110,9 @@ void install_out_of_memory_new_handler(); /// the following unwind succeeds, e.g. do not trigger additional allocations /// in the unwind chain. /// -/// If no error handler is installed (default), then a bad_alloc exception -/// is thrown, if LLVM is compiled with exception support, otherwise an -/// assertion is called. +/// If no error handler is installed (default), throws a bad_alloc exception +/// if LLVM is compiled with exception support. Otherwise prints the error +/// to standard error and calls abort(). LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true); diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 5b3de63cd359a..a29e150b904a3 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -173,6 +173,10 @@ struct KnownBits { One.extractBits(NumBits, BitPosition)); } + /// Return KnownBits based on this, but updated given that the underlying + /// value is known to be greater than or equal to Val. + KnownBits makeGE(const APInt &Val) const; + /// Returns the minimum number of trailing zero bits. unsigned countMinTrailingZeros() const { return Zero.countTrailingOnes(); @@ -241,6 +245,18 @@ struct KnownBits { static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS); + /// Compute known bits for umax(LHS, RHS). + static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for umin(LHS, RHS). + static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for smax(LHS, RHS). + static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for smin(LHS, RHS). + static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS); + /// Insert the bits from a smaller known bits starting at bitPosition. void insertBits(const KnownBits &SubBits, unsigned BitPosition) { Zero.insertBits(SubBits.Zero, BitPosition); diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index b6392e61db4b3..a0b8fbde25a96 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -116,6 +116,15 @@ class ElementCount { unsigned getKnownMinValue() const { return Min; } + // Return the minimum value with the assumption that the count is exact. + // Use in places where a scalable count doesn't make sense (e.g. non-vector + // types, or vectors in backends which don't support scalable vectors). + unsigned getFixedValue() const { + assert(!Scalable && + "Request for a fixed element count on a scalable object"); + return Min; + } + bool isScalable() const { return Scalable; } }; diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index af09c21085c5e..055c0e5dd86f3 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -749,6 +749,10 @@ class RedirectingFileSystem : public vfs::FileSystem { StringRef getExternalContentsPrefixDir() const; + void setFallthrough(bool Fallthrough); + + std::vector getRoots() const; + void dump(raw_ostream &OS) const; void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/include/llvm/Support/Win64EH.h b/llvm/include/llvm/Support/Win64EH.h index 8220131e5be92..9359fcb4286a9 100644 --- a/llvm/include/llvm/Support/Win64EH.h +++ b/llvm/include/llvm/Support/Win64EH.h @@ -38,12 +38,14 @@ enum UnwindOpcodes { // The following set of unwind opcodes is for ARM64. They are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling UOP_AllocMedium, + UOP_SaveR19R20X, UOP_SaveFPLRX, UOP_SaveFPLR, UOP_SaveReg, UOP_SaveRegX, UOP_SaveRegP, UOP_SaveRegPX, + UOP_SaveLRPair, UOP_SaveFReg, UOP_SaveFRegX, UOP_SaveFRegP, @@ -51,7 +53,11 @@ enum UnwindOpcodes { UOP_SetFP, UOP_AddFP, UOP_Nop, - UOP_End + UOP_End, + UOP_SaveNext, + UOP_TrapFrame, + UOP_Context, + UOP_ClearUnwoundToCall }; /// UnwindCode - This union describes a single operation in a function prolog, diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 4b0fe43c18684..6a6f97ae78b04 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -373,7 +373,7 @@ def ext_ext_fold: GICombineRule < (apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }]) >; -def not_cmp_fold_matchinfo : GIDefMatchData<"Register">; +def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector">; def not_cmp_fold : GICombineRule< (defs root:$d, not_cmp_fold_matchinfo:$info), (match (wip_match_opcode G_XOR): $d, diff --git a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h index af905bbecad8f..21943616c5e1b 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h @@ -1,4 +1,4 @@ -//===--------- Definition of the HeapProfiler class ---------*- C++ -*-===// +//===--------- Definition of the HeapProfiler class -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -38,8 +38,6 @@ class ModuleHeapProfilerPass : public PassInfoMixin { public: explicit ModuleHeapProfilerPass(); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - -private: }; // Insert HeapProfiler instrumentation diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index f2818c6b792ec..be3804f95c3e8 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -46,11 +46,12 @@ class FunctionPass; class IntrinsicInst; class LoadInst; class LoopInfo; +class MemorySSA; +class MemorySSAUpdater; class OptimizationRemarkEmitter; class PHINode; class TargetLibraryInfo; class Value; - /// A private "module" namespace for types and utilities used by GVN. These /// are implementation details and should not be used by clients. namespace gvn LLVM_LIBRARY_VISIBILITY { @@ -211,6 +212,7 @@ class GVN : public PassInfoMixin { OptimizationRemarkEmitter *ORE = nullptr; ImplicitControlFlowTracking *ICF = nullptr; LoopInfo *LI = nullptr; + MemorySSAUpdater *MSSAU = nullptr; ValueTable VN; @@ -246,7 +248,7 @@ class GVN : public PassInfoMixin { bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 3e278b31910f4..89a2e24af288b 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -30,6 +30,8 @@ class Instruction; class MemCpyInst; class MemMoveInst; class MemoryDependenceResults; +class MemorySSA; +class MemorySSAUpdater; class MemSetInst; class StoreInst; class TargetLibraryInfo; @@ -41,6 +43,7 @@ class MemCpyOptPass : public PassInfoMixin { AliasAnalysis *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; + MemorySSAUpdater *MSSAU = nullptr; public: MemCpyOptPass() = default; @@ -50,7 +53,7 @@ class MemCpyOptPass : public PassInfoMixin { // Glue for the old PM. bool runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_); + AssumptionCache *AC_, DominatorTree *DT_, MemorySSA *MSSA_); private: // Helper functions diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 5ab2dd496282f..fb6f0269a0ac2 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -199,6 +199,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr); /// branches to us and one of our successors, fold the setcc into the /// predecessor and use logical operations to pick the right destination. bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr, + const TargetTransformInfo *TTI = nullptr, unsigned BonusInstThreshold = 1); /// This function takes a virtual register computed by an Instruction and diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 78ae38288c0c3..77360cb2671d8 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -39,6 +39,19 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE); +/// struct for holding enough information to help calculate the cost of the +/// given SCEV when expanded into IR. +struct SCEVOperand { + explicit SCEVOperand(unsigned Opc, int Idx, const SCEV *S) : + ParentOpcode(Opc), OperandIdx(Idx), S(S) { } + /// LLVM instruction opcode that uses the operand. + unsigned ParentOpcode; + /// The use index of an expanded instruction. + int OperandIdx; + /// The SCEV operand to be costed. + const SCEV* S; +}; + /// This class uses information about analyze scalars to rewrite expressions /// in canonical form. /// @@ -220,14 +233,14 @@ class SCEVExpander : public SCEVVisitor { assert(At && "This function requires At instruction to be provided."); if (!TTI) // In assert-less builds, avoid crashing return true; // by always claiming to be high-cost. - SmallVector Worklist; + SmallVector Worklist; SmallPtrSet Processed; int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic; - Worklist.emplace_back(Expr); + Worklist.emplace_back(-1, -1, Expr); while (!Worklist.empty()) { - const SCEV *S = Worklist.pop_back_val(); - if (isHighCostExpansionHelper(S, L, *At, BudgetRemaining, *TTI, Processed, - Worklist)) + const SCEVOperand WorkItem = Worklist.pop_back_val(); + if (isHighCostExpansionHelper(WorkItem, L, *At, BudgetRemaining, + *TTI, Processed, Worklist)) return true; } assert(BudgetRemaining >= 0 && "Should have returned from inner loop."); @@ -394,11 +407,11 @@ class SCEVExpander : public SCEVVisitor { Value *expandCodeForImpl(const SCEV *SH, Type *Ty, Instruction *I, bool Root); /// Recursive helper function for isHighCostExpansion. - bool isHighCostExpansionHelper(const SCEV *S, Loop *L, const Instruction &At, - int &BudgetRemaining, - const TargetTransformInfo &TTI, - SmallPtrSetImpl &Processed, - SmallVectorImpl &Worklist); + bool isHighCostExpansionHelper( + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl &Processed, + SmallVectorImpl &Worklist); /// Insert the specified binary operator, doing a small amount of work to /// avoid inserting an obviously redundant operation, and hoisting to an diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index 22959c62fc81e..b1d0a703850b9 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -65,6 +65,7 @@ module LLVM_BinaryFormat { textual header "BinaryFormat/ELFRelocs/ARC.def" textual header "BinaryFormat/ELFRelocs/AVR.def" textual header "BinaryFormat/ELFRelocs/BPF.def" + textual header "BinaryFormat/ELFRelocs/CSKY.def" textual header "BinaryFormat/ELFRelocs/Hexagon.def" textual header "BinaryFormat/ELFRelocs/i386.def" textual header "BinaryFormat/ELFRelocs/Lanai.def" diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index a9ece42df8563..0496e23195d57 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -57,7 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoWrapperPassPass(Registry); initializeLazyValueInfoPrinterPass(Registry); initializeLegacyDivergenceAnalysisPass(Registry); - initializeLintPass(Registry); + initializeLintLegacyPassPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 2ede4baaf6832..ca043b415b107 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -975,22 +975,14 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, return ModRefInfo::NoModRef; } - // The semantics of memcpy intrinsics forbid overlap between their respective - // operands, i.e., source and destination of any given memcpy must no-alias. - // If Loc must-aliases either one of these two locations, then it necessarily - // no-aliases the other. + // The semantics of memcpy intrinsics either exactly overlap or do not + // overlap, i.e., source and destination of any given memcpy are either + // no-alias or must-alias. if (auto *Inst = dyn_cast(Call)) { - AliasResult SrcAA, DestAA; - - if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), - Loc, AAQI)) == MustAlias) - // Loc is exactly the memcpy source thus disjoint from memcpy dest. - return ModRefInfo::Ref; - if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), - Loc, AAQI)) == MustAlias) - // The converse case. - return ModRefInfo::Mod; - + AliasResult SrcAA = + getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI); + AliasResult DestAA = + getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI); // It's also possible for Loc to alias both src and dest, or neither. ModRefInfo rv = ModRefInfo::NoModRef; if (SrcAA != NoAlias) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 843f0608a963b..7c13b41bc7e64 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3965,6 +3965,15 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt && Pred == ICmpInst::ICMP_EQ) return FalseVal; + + // X == 0 ? abs(X) : -abs(X) --> -abs(X) + // X == 0 ? -abs(X) : abs(X) --> abs(X) + if (match(TrueVal, m_Intrinsic(m_Value(X))) && + match(FalseVal, m_Neg(m_Intrinsic(m_Specific(X))))) + return FalseVal; + if (match(TrueVal, m_Neg(m_Intrinsic(m_Value(X)))) && + match(FalseVal, m_Intrinsic(m_Specific(X)))) + return FalseVal; } // Check for other compares that behave like bit test. @@ -5326,6 +5335,13 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit)) return Op1; + if (Optional Imp = + isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL)) + return *Imp ? Op0 : Op1; + if (Optional Imp = + isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL)) + return *Imp ? Op1 : Op0; + break; } case Intrinsic::usub_with_overflow: diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 4a159d6035f0d..04e04a8053e87 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -63,6 +63,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -80,134 +81,102 @@ using namespace llvm; namespace { - namespace MemRef { - static const unsigned Read = 1; - static const unsigned Write = 2; - static const unsigned Callee = 4; - static const unsigned Branchee = 8; - } // end namespace MemRef - - class Lint : public FunctionPass, public InstVisitor { - friend class InstVisitor; - - void visitFunction(Function &F); - - void visitCallBase(CallBase &CB); - void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, - MaybeAlign Alignment, Type *Ty, unsigned Flags); - void visitEHBeginCatch(IntrinsicInst *II); - void visitEHEndCatch(IntrinsicInst *II); - - void visitReturnInst(ReturnInst &I); - void visitLoadInst(LoadInst &I); - void visitStoreInst(StoreInst &I); - void visitXor(BinaryOperator &I); - void visitSub(BinaryOperator &I); - void visitLShr(BinaryOperator &I); - void visitAShr(BinaryOperator &I); - void visitShl(BinaryOperator &I); - void visitSDiv(BinaryOperator &I); - void visitUDiv(BinaryOperator &I); - void visitSRem(BinaryOperator &I); - void visitURem(BinaryOperator &I); - void visitAllocaInst(AllocaInst &I); - void visitVAArgInst(VAArgInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitUnreachableInst(UnreachableInst &I); - - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, - SmallPtrSetImpl &Visited) const; - - public: - Module *Mod; - const DataLayout *DL; - AliasAnalysis *AA; - AssumptionCache *AC; - DominatorTree *DT; - TargetLibraryInfo *TLI; - - std::string Messages; - raw_string_ostream MessagesStr; - - static char ID; // Pass identification, replacement for typeid - Lint() : FunctionPass(ID), MessagesStr(Messages) { - initializeLintPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - void print(raw_ostream &O, const Module *M) const override {} - - void WriteValues(ArrayRef Vs) { - for (const Value *V : Vs) { - if (!V) - continue; - if (isa(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; - } +namespace MemRef { +static const unsigned Read = 1; +static const unsigned Write = 2; +static const unsigned Callee = 4; +static const unsigned Branchee = 8; +} // end namespace MemRef + +class Lint : public InstVisitor { + friend class InstVisitor; + + void visitFunction(Function &F); + + void visitCallBase(CallBase &CB); + void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, + MaybeAlign Alignment, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); + + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSetImpl &Visited) const; + +public: + Module *Mod; + const DataLayout *DL; + AliasAnalysis *AA; + AssumptionCache *AC; + DominatorTree *DT; + TargetLibraryInfo *TLI; + + std::string Messages; + raw_string_ostream MessagesStr; + + Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, + AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) + : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), + MessagesStr(Messages) {} + + void WriteValues(ArrayRef Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; } } + } - /// A check failed, so printout out the condition and the message. - /// - /// This provides a nice place to put a breakpoint if you want to see why - /// something is not correct. - void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } - - /// A check failed (with values to print). - /// - /// This calls the Message-only version so that the above is easier to set - /// a breakpoint on. - template - void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { - CheckFailed(Message); - WriteValues({V1, Vs...}); - } - }; + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); + } +}; } // end anonymous namespace -char Lint::ID = 0; -INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", - false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", - false, true) - // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ - do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) - -// Lint::run - This is the main Analysis entry point for a -// function. -// -bool Lint::runOnFunction(Function &F) { - Mod = F.getParent(); - DL = &F.getParent()->getDataLayout(); - AA = &getAnalysis().getAAResults(); - AC = &getAnalysis().getAssumptionCache(F); - DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(F); - visit(F); - dbgs() << MessagesStr.str(); - Messages.clear(); - return false; -} +#define Assert(C, ...) \ + do { \ + if (!(C)) { \ + CheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a @@ -281,8 +250,7 @@ void Lint::visitCallBase(CallBase &I) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - Type *Ty = - cast(Formal->getType())->getElementType(); + Type *Ty = cast(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlign(Ty), Ty, MemRef::Read | MemRef::Write); @@ -309,12 +277,12 @@ void Lint::visitCallBase(CallBase &I) { } } - if (IntrinsicInst *II = dyn_cast(&I)) switch (II->getIntrinsicID()) { - default: break; + default: + break; - // TODO: Check more intrinsics + // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast(&I); @@ -553,7 +521,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); + KnownBits Known = + computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); return Known.isZero(); } @@ -682,11 +651,13 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) return findValueImpl(U, OffsetOk, Visited); - if (BBI != BB->begin()) break; + if (BBI != BB->begin()) + break; BB = BB->getUniquePredecessor(); - if (!BB) break; + if (!BB) + break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast(V)) { @@ -696,8 +667,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast(V)) { - if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), - Ex->getIndices())) + if (Value *W = + FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { @@ -728,22 +699,75 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return V; } +PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &AM.getResult(F); + auto *AC = &AM.getResult(F); + auto *DT = &AM.getResult(F); + auto *TLI = &AM.getResult(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return PreservedAnalyses::all(); +} + +class LintLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LintLegacyPass() : FunctionPass(ID) { + initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + void print(raw_ostream &O, const Module *M) const override {} +}; + +char LintLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) + +bool LintLegacyPass::runOnFunction(Function &F) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &getAnalysis().getAAResults(); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DT = &getAnalysis().getDomTree(); + auto *TLI = &getAnalysis().getTLI(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return false; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// -FunctionPass *llvm::createLintPass() { - return new Lint(); -} +FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); } /// lintFunction - Check a function for errors, printing messages on stderr. /// void llvm::lintFunction(const Function &f) { - Function &F = const_cast(f); + Function &F = const_cast(f); assert(!F.isDeclaration() && "Cannot lint external functions"); legacy::FunctionPassManager FPM(F.getParent()); - Lint *V = new Lint(); + auto *V = new LintLegacyPass(); FPM.add(V); FPM.run(F); } @@ -752,7 +776,7 @@ void llvm::lintFunction(const Function &f) { /// void llvm::lintModule(const Module &M) { legacy::PassManager PM; - Lint *V = new Lint(); + auto *V = new LintLegacyPass(); PM.add(V); - PM.run(const_cast(M)); + PM.run(const_cast(M)); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 9c9b9c53c939f..40d89fff04587 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6341,6 +6341,25 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Invoke: if (Value *RV = cast(U)->getReturnedArgOperand()) return getSCEV(RV); + + if (auto *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::umax: + return getUMaxExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::umin: + return getUMinExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::smax: + return getSMaxExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::smin: + return getSMinExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + default: + break; + } + } break; } diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index f95a8918afbba..ab5f2db7d1cd9 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -292,7 +292,7 @@ LLVM_DUMP_METHOD void StackLifetime::dumpBlockLiveness() const { const BasicBlock *BB = IT.getFirst(); const BlockLifetimeInfo &BlockInfo = BlockLiveness.find(BB)->getSecond(); auto BlockRange = BlockInstRange.find(BB)->getSecond(); - dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second + dbgs() << " BB (" << BB->getName() << ") [" << BlockRange.first << ", " << BlockRange.second << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End << ", livein " << BlockInfo.LiveIn << ", liveout " << BlockInfo.LiveOut << "\n"; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b592412ed0b6f..6e5a7195bb194 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1212,59 +1212,41 @@ static void computeKnownBitsFromOperator(const Operator *I, if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); computeKnownBits(LHS, Known2, Depth + 1, Q); - } else { - computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + switch (SPF) { + default: + llvm_unreachable("Unhandled select pattern flavor!"); + case SPF_SMAX: + Known = KnownBits::smax(Known, Known2); + break; + case SPF_SMIN: + Known = KnownBits::smin(Known, Known2); + break; + case SPF_UMAX: + Known = KnownBits::umax(Known, Known2); + break; + case SPF_UMIN: + Known = KnownBits::umin(Known, Known2); + break; + } + break; } - unsigned MaxHighOnes = 0; - unsigned MaxHighZeros = 0; - if (SPF == SPF_SMAX) { - // If both sides are negative, the result is negative. - if (Known.isNegative() && Known2.isNegative()) - // We can derive a lower bound on the result by taking the max of the - // leading one bits. - MaxHighOnes = - std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); - // If either side is non-negative, the result is non-negative. - else if (Known.isNonNegative() || Known2.isNonNegative()) - MaxHighZeros = 1; - } else if (SPF == SPF_SMIN) { - // If both sides are non-negative, the result is non-negative. - if (Known.isNonNegative() && Known2.isNonNegative()) - // We can derive an upper bound on the result by taking the max of the - // leading zero bits. - MaxHighZeros = std::max(Known.countMinLeadingZeros(), - Known2.countMinLeadingZeros()); - // If either side is negative, the result is negative. - else if (Known.isNegative() || Known2.isNegative()) - MaxHighOnes = 1; - } else if (SPF == SPF_UMAX) { - // We can derive a lower bound on the result by taking the max of the - // leading one bits. - MaxHighOnes = - std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); - } else if (SPF == SPF_UMIN) { - // We can derive an upper bound on the result by taking the max of the - // leading zero bits. - MaxHighZeros = - std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); - } else if (SPF == SPF_ABS) { + computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + + if (SPF == SPF_ABS) { // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. if (match(RHS, m_Neg(m_Specific(LHS))) && Q.IIQ.hasNoSignedWrap(cast(RHS))) - MaxHighZeros = 1; + Known.Zero.setSignBit(); } - // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - if (MaxHighOnes > 0) - Known.One.setHighBits(MaxHighOnes); - if (MaxHighZeros > 0) - Known.Zero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: @@ -2413,8 +2395,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well // as casts that can alter the value, e.g., AddrSpaceCasts. if (const GEPOperator *GEP = dyn_cast(V)) - if (isGEPKnownNonNull(GEP, Depth, Q)) - return true; + return isGEPKnownNonNull(GEP, Depth, Q); if (auto *BCO = dyn_cast(V)) return isKnownNonZero(BCO->getOperand(0), Depth, Q); @@ -2568,11 +2549,13 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, const Value *Vec = EEI->getVectorOperand(); const Value *Idx = EEI->getIndexOperand(); auto *CIdx = dyn_cast(Idx); - unsigned NumElts = cast(Vec->getType())->getNumElements(); - APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); - if (CIdx && CIdx->getValue().ult(NumElts)) - DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); - return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + if (auto *VecTy = dyn_cast(Vec->getType())) { + unsigned NumElts = VecTy->getNumElements(); + APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); + if (CIdx && CIdx->getValue().ult(NumElts)) + DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); + return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + } } KnownBits Known(BitWidth); diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 0bc8b7281d91e..e241300dd2e7c 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -342,7 +342,7 @@ int llvm::getSplatIndex(ArrayRef Mask) { /// This function is not fully general. It checks only 2 cases: /// the input value is (1) a splat constant vector or (2) a sequence /// of instructions that broadcasts a scalar at element 0. -const llvm::Value *llvm::getSplatValue(const Value *V) { +Value *llvm::getSplatValue(const Value *V) { if (isa(V->getType())) if (auto *C = dyn_cast(V)) return C->getSplatValue(); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 538107cecd8b3..57bf500ba8923 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -547,22 +548,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as - // that might give surprising results. - std::vector RestrRegs; + // that might lead to undefined behaviour. + SmallVector RestrRegs; + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Start with the first operand descriptor, and iterate over them. for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); I < NumOps; ++I) { const MachineOperand &MO = MI->getOperand(I); - if (MO.isImm()) { - unsigned Flags = MO.getImm(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && - !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { - RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); - } - // Skip to one before the next operand descriptor, if it exists. - I += InlineAsm::getNumOperandRegisters(Flags); + if (!MO.isImm()) + continue; + unsigned Flags = MO.getImm(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) { + Register Reg = MI->getOperand(I + 1).getReg(); + if (!TRI->isAsmClobberable(*MF, Reg)) + RestrRegs.push_back(Reg); } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); } if (!RestrRegs.empty()) { @@ -572,14 +574,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); std::string Msg = "inline asm clobber list contains reserved registers: "; - for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) { if(I != RestrRegs.begin()) Msg += ", "; - Msg += *I; + Msg += TRI->getName(*I); } - std::string Note = "Reserved registers on the clobber list may not be " - "preserved across the asm statement, and clobbering them may " - "lead to undefined behaviour."; + const char *Note = + "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index ceed1fe6e3bd5..40c741077d1ad 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1013,8 +1013,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) + // Ignore the size if it's a non-enum forward decl. // TODO: Do we care about size for enum forward declarations? - if (Size) + if (Size && + (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 40512013e4e10..617692a347922 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -136,6 +136,7 @@ add_llvm_component_library(LLVMCodeGen RegisterPressure.cpp RegisterScavenging.cpp RenameIndependentSubregs.cpp + MachineStableHash.cpp MIRVRegNamerUtils.cpp MIRNamerPass.cpp MIRCanonicalizerPass.cpp diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 3d8c2c8b00aa7..9662a583e3694 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -184,6 +185,11 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { } } +void CCState::ensureMaxAlignment(Align Alignment) { + if (!AnalyzingMustTailForwardedRegs) + MF.getFrameInfo().ensureMaxAlignment(Alignment); +} + static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { if (VT.isVector()) return true; // Assume -msse-regparm might be in effect. diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3272f36a14360..9a4ed2fab608b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5314,88 +5314,112 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, /// zero index. bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr) { - const GetElementPtrInst *GEP = dyn_cast(Ptr); - if (!GEP || !GEP->hasIndices()) + // FIXME: Support scalable vectors. + if (isa(Ptr->getType())) return false; - // If the GEP and the gather/scatter aren't in the same BB, don't optimize. - // FIXME: We should support this by sinking the GEP. - if (MemoryInst->getParent() != GEP->getParent()) - return false; - - SmallVector Ops(GEP->op_begin(), GEP->op_end()); + Value *NewAddr; - bool RewriteGEP = false; + if (const auto *GEP = dyn_cast(Ptr)) { + // Don't optimize GEPs that don't have indices. + if (!GEP->hasIndices()) + return false; - if (Ops[0]->getType()->isVectorTy()) { - Ops[0] = const_cast(getSplatValue(Ops[0])); - if (!Ops[0]) + // If the GEP and the gather/scatter aren't in the same BB, don't optimize. + // FIXME: We should support this by sinking the GEP. + if (MemoryInst->getParent() != GEP->getParent()) return false; - RewriteGEP = true; - } - unsigned FinalIndex = Ops.size() - 1; + SmallVector Ops(GEP->op_begin(), GEP->op_end()); - // Ensure all but the last index is 0. - // FIXME: This isn't strictly required. All that's required is that they are - // all scalars or splats. - for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast(Ops[i]); - if (!C) - return false; - if (isa(C->getType())) - C = C->getSplatValue(); - auto *CI = dyn_cast_or_null(C); - if (!CI || !CI->isZero()) - return false; - // Scalarize the index if needed. - Ops[i] = CI; - } - - // Try to scalarize the final index. - if (Ops[FinalIndex]->getType()->isVectorTy()) { - if (Value *V = const_cast(getSplatValue(Ops[FinalIndex]))) { - auto *C = dyn_cast(V); - // Don't scalarize all zeros vector. - if (!C || !C->isZero()) { - Ops[FinalIndex] = V; - RewriteGEP = true; - } + bool RewriteGEP = false; + + if (Ops[0]->getType()->isVectorTy()) { + Ops[0] = getSplatValue(Ops[0]); + if (!Ops[0]) + return false; + RewriteGEP = true; } - } - // If we made any changes or the we have extra operands, we need to generate - // new instructions. - if (!RewriteGEP && Ops.size() == 2) - return false; + unsigned FinalIndex = Ops.size() - 1; - unsigned NumElts = cast(Ptr->getType())->getNumElements(); + // Ensure all but the last index is 0. + // FIXME: This isn't strictly required. All that's required is that they are + // all scalars or splats. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast(Ops[i]); + if (!C) + return false; + if (isa(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null(C); + if (!CI || !CI->isZero()) + return false; + // Scalarize the index if needed. + Ops[i] = CI; + } + + // Try to scalarize the final index. + if (Ops[FinalIndex]->getType()->isVectorTy()) { + if (Value *V = getSplatValue(Ops[FinalIndex])) { + auto *C = dyn_cast(V); + // Don't scalarize all zeros vector. + if (!C || !C->isZero()) { + Ops[FinalIndex] = V; + RewriteGEP = true; + } + } + } - IRBuilder<> Builder(MemoryInst); + // If we made any changes or the we have extra operands, we need to generate + // new instructions. + if (!RewriteGEP && Ops.size() == 2) + return false; - Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); + unsigned NumElts = cast(Ptr->getType())->getNumElements(); - Value *NewAddr; + IRBuilder<> Builder(MemoryInst); - // If the final index isn't a vector, emit a scalar GEP containing all ops - // and a vector GEP with all zeroes final index. - if (!Ops[FinalIndex]->getType()->isVectorTy()) { - NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); - auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); - NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); - } else { - Value *Base = Ops[0]; - Value *Index = Ops[FinalIndex]; + Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); - // Create a scalar GEP if there are more than 2 operands. - if (Ops.size() != 2) { - // Replace the last index with 0. - Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); - Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + // If the final index isn't a vector, emit a scalar GEP containing all ops + // and a vector GEP with all zeroes final index. + if (!Ops[FinalIndex]->getType()->isVectorTy()) { + NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); + auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); + } else { + Value *Base = Ops[0]; + Value *Index = Ops[FinalIndex]; + + // Create a scalar GEP if there are more than 2 operands. + if (Ops.size() != 2) { + // Replace the last index with 0. + Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); + Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + } + + // Now create the GEP with scalar pointer and vector index. + NewAddr = Builder.CreateGEP(Base, Index); } + } else if (!isa(Ptr)) { + // Not a GEP, maybe its a splat and we can create a GEP to enable + // SelectionDAGBuilder to use it as a uniform base. + Value *V = getSplatValue(Ptr); + if (!V) + return false; + + unsigned NumElts = cast(Ptr->getType())->getNumElements(); + + IRBuilder<> Builder(MemoryInst); - // Now create the GEP with scalar pointer and vector index. - NewAddr = Builder.CreateGEP(Base, Index); + // Emit a vector GEP with a scalar pointer and all 0s vector index. + Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); + auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy)); + } else { + // Constant, SelectionDAGBuilder knows to check if its a splat. + return false; } MemoryInst->replaceUsesOfWith(Ptr, NewAddr); diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 517b2cd25fc48..a580d3cc5785c 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -866,8 +866,8 @@ bool EarlyIfConverter::shouldConvertIf() { // by inserting select instructions. MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); struct CriticalPathInfo { - unsigned Extra; //< Count of extra cycles that the component adds. - unsigned Depth; //< Absolute depth of the component in cycles. + unsigned Extra; // Count of extra cycles that the component adds. + unsigned Depth; // Absolute depth of the component in cycles. }; CriticalPathInfo Cond{}; CriticalPathInfo TBlock{}; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 5dbd09670feaf..10cd58f17e9aa 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -697,7 +697,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, return false; LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); - + // FIXME: The following use traversal needs a bail out for patholigical cases. for (auto &Use : MRI.use_nodbg_instructions(Base)) { if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) continue; @@ -824,6 +824,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) return false; + // For now, no targets actually support these opcodes so don't waste time + // running these unless we're forced to for testing. + if (!ForceLegalIndexing) + return false; + MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, MatchInfo.Offset); if (!MatchInfo.IsPre && @@ -2238,13 +2243,13 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, isConstTrueVal(TLI, Cst, IsVector, IsFP); } -bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) { +bool CombinerHelper::matchNotCmp(MachineInstr &MI, + SmallVectorImpl &RegsToNegate) { assert(MI.getOpcode() == TargetOpcode::G_XOR); LLT Ty = MRI.getType(MI.getOperand(0).getReg()); const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); Register XorSrc; Register CstReg; - int64_t Cst; // We match xor(src, true) here. if (!mi_match(MI.getOperand(0).getReg(), MRI, m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) @@ -2253,15 +2258,51 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) { if (!MRI.hasOneNonDBGUse(XorSrc)) return false; - // Now try match src to either icmp or fcmp. + // Check that XorSrc is the root of a tree of comparisons combined with ANDs + // and ORs. The suffix of RegsToNegate starting from index I is used a work + // list of tree nodes to visit. + RegsToNegate.push_back(XorSrc); + // Remember whether the comparisons are all integer or all floating point. + bool IsInt = false; bool IsFP = false; - if (!mi_match(XorSrc, MRI, m_GICmp(m_Pred(), m_Reg(), m_Reg()))) { - // Try fcmp. - if (!mi_match(XorSrc, MRI, m_GFCmp(m_Pred(), m_Reg(), m_Reg()))) + for (unsigned I = 0; I < RegsToNegate.size(); ++I) { + Register Reg = RegsToNegate[I]; + if (!MRI.hasOneNonDBGUse(Reg)) return false; - IsFP = true; + MachineInstr *Def = MRI.getVRegDef(Reg); + switch (Def->getOpcode()) { + default: + // Don't match if the tree contains anything other than ANDs, ORs and + // comparisons. + return false; + case TargetOpcode::G_ICMP: + if (IsFP) + return false; + IsInt = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_FCMP: + if (IsInt) + return false; + IsFP = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + // Implement De Morgan's laws: + // ~(x & y) -> ~x | ~y + // ~(x | y) -> ~x & ~y + // When we apply the combine we will change the opcode and recursively + // negate the operands. + RegsToNegate.push_back(Def->getOperand(1).getReg()); + RegsToNegate.push_back(Def->getOperand(2).getReg()); + break; + } } + // Now we know whether the comparisons are integer or floating point, check + // the constant in the xor. + int64_t Cst; if (Ty.isVector()) { MachineInstr *CstDef = MRI.getVRegDef(CstReg); auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); @@ -2276,25 +2317,38 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) { return false; } - CmpReg = XorSrc; return true; } -bool CombinerHelper::applyNotCmp(MachineInstr &MI, Register &CmpReg) { - MachineInstr *CmpDef = MRI.getVRegDef(CmpReg); - assert(CmpDef && "Should have been given an MI reg"); - assert(CmpDef->getOpcode() == TargetOpcode::G_ICMP || - CmpDef->getOpcode() == TargetOpcode::G_FCMP); - - Observer.changingInstr(*CmpDef); - MachineOperand &PredOp = CmpDef->getOperand(1); - CmpInst::Predicate NewP = CmpInst::getInversePredicate( - (CmpInst::Predicate)PredOp.getPredicate()); - PredOp.setPredicate(NewP); - Observer.changedInstr(*CmpDef); +bool CombinerHelper::applyNotCmp(MachineInstr &MI, + SmallVectorImpl &RegsToNegate) { + for (Register Reg : RegsToNegate) { + MachineInstr *Def = MRI.getVRegDef(Reg); + Observer.changingInstr(*Def); + // For each comparison, invert the opcode. For each AND and OR, change the + // opcode. + switch (Def->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: { + MachineOperand &PredOp = Def->getOperand(1); + CmpInst::Predicate NewP = CmpInst::getInversePredicate( + (CmpInst::Predicate)PredOp.getPredicate()); + PredOp.setPredicate(NewP); + break; + } + case TargetOpcode::G_AND: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); + break; + case TargetOpcode::G_OR: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + break; + } + Observer.changedInstr(*Def); + } - replaceRegWith(MRI, MI.getOperand(0).getReg(), - CmpDef->getOperand(0).getReg()); + replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index c615462af407e..3ebbac9fd659a 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -308,11 +308,24 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known, DemandedElts, Depth + 1); break; } - case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMIN: { + // TODO: Handle clamp pattern with number of sign bits + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + Depth + 1); + Known = KnownBits::smin(Known, KnownRHS); + break; + } case TargetOpcode::G_SMAX: { // TODO: Handle clamp pattern with number of sign bits - computeKnownBitsMin(MI.getOperand(1).getReg(), MI.getOperand(2).getReg(), - Known, DemandedElts, Depth + 1); + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + Depth + 1); + Known = KnownBits::smax(Known, KnownRHS); break; } case TargetOpcode::G_UMIN: { @@ -321,13 +334,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, DemandedElts, Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, Depth + 1); - - // UMIN - we know that the result will have the maximum of the - // known zero leading bits of the inputs. - unsigned LeadZero = Known.countMinLeadingZeros(); - LeadZero = std::max(LeadZero, KnownRHS.countMinLeadingZeros()); - Known &= KnownRHS; - Known.Zero.setHighBits(LeadZero); + Known = KnownBits::umin(Known, KnownRHS); break; } case TargetOpcode::G_UMAX: { @@ -336,14 +343,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, DemandedElts, Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, Depth + 1); - - // UMAX - we know that the result will have the maximum of the - // known one leading bits of the inputs. - unsigned LeadOne = Known.countMinLeadingOnes(); - LeadOne = std::max(LeadOne, KnownRHS.countMinLeadingOnes()); - Known.Zero &= KnownRHS.Zero; - Known.One &= KnownRHS.One; - Known.One.setHighBits(LeadOne); + Known = KnownBits::umax(Known, KnownRHS); break; } case TargetOpcode::G_FCMP: diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 303f2d8417b57..6f8d233043e70 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -13,7 +13,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -23,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 16c9bfc672af5..dc1b0a867b0d6 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -200,6 +200,13 @@ class ImplicitNullChecks : public MachineFunctionPass { unsigned PointerReg, ArrayRef PrevInsts); + /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block + /// if it was hoisted to the NullCheck block. This is used by caller + /// canHoistInst to decide if DependenceMI can be hoisted safely. + bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI, + MachineBasicBlock *NullSucc, + unsigned PointerReg); + /// Return true if \p FaultingMI can be hoisted from after the /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a /// non-null value if we also need to (and legally can) hoist a depedency. @@ -368,18 +375,26 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) || - !BaseOp->isReg() || BaseOp->getReg() != PointerReg) + // FIXME: This handles only simple addressing mode. + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return SR_Unsuitable; + + // We need the base of the memory instruction to be same as the register + // where the null check is performed (i.e. PointerReg). + if (!BaseOp->isReg() || BaseOp->getReg() != PointerReg) return SR_Unsuitable; - // FIXME: This algorithm assumes instructions have fixed-size offsets. + // Scalable offsets are a part of scalable vectors (SVE for AArch64). That + // target is in-practice unsupported for ImplicitNullChecks. if (OffsetIsScalable) return SR_Unsuitable; + if (!MI.mayLoadOrStore() || MI.isPredicable()) + return SR_Unsuitable; + // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. - if (!(MI.mayLoadOrStore() && !MI.isPredicable() && - -PageSize < Offset && Offset < PageSize)) + if (!(-PageSize < Offset && Offset < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. @@ -393,32 +408,9 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, return SR_Suitable; } -bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, - unsigned PointerReg, - ArrayRef InstsSeenSoFar, - MachineBasicBlock *NullSucc, - MachineInstr *&Dependence) { - auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar); - if (!DepResult.CanReorder) - return false; - - if (!DepResult.PotentialDependence) { - Dependence = nullptr; - return true; - } - - auto DependenceItr = *DepResult.PotentialDependence; - auto *DependenceMI = *DependenceItr; - - // We don't want to reason about speculating loads. Note -- at this point - // we should have already filtered out all of the other non-speculatable - // things, like calls and stores. - // We also do not want to hoist stores because it might change the memory - // while the FaultingMI may result in faulting. - assert(canHandle(DependenceMI) && "Should never have reached here!"); - if (DependenceMI->mayLoadOrStore()) - return false; - +bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns( + MachineInstr *DependenceMI, MachineBasicBlock *NullSucc, + unsigned PointerReg) { for (auto &DependenceMO : DependenceMI->operands()) { if (!(DependenceMO.isReg() && DependenceMO.getReg())) continue; @@ -441,7 +433,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, // same as it would have been had the load not have executed and we'd have // branched to NullSucc directly. if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg())) - return false; + return true; // The Dependency can't be re-defining the base register -- then we won't // get the memory operation on the address we want. This is already @@ -451,6 +443,39 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, "Should have been checked before!"); } + // The dependence does not clobber live-ins in NullSucc block. + return false; +} + +bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, + unsigned PointerReg, + ArrayRef InstsSeenSoFar, + MachineBasicBlock *NullSucc, + MachineInstr *&Dependence) { + auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar); + if (!DepResult.CanReorder) + return false; + + if (!DepResult.PotentialDependence) { + Dependence = nullptr; + return true; + } + + auto DependenceItr = *DepResult.PotentialDependence; + auto *DependenceMI = *DependenceItr; + + // We don't want to reason about speculating loads. Note -- at this point + // we should have already filtered out all of the other non-speculatable + // things, like calls and stores. + // We also do not want to hoist stores because it might change the memory + // while the FaultingMI may result in faulting. + assert(canHandle(DependenceMI) && "Should never have reached here!"); + if (DependenceMI->mayLoadOrStore()) + return false; + + if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc, PointerReg)) + return false; + auto DepDepResult = computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr}); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index a0e85c82868fb..cfaec85d3f3dd 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1313,7 +1313,6 @@ class InstrRefBasedLDV : public LDVImpl { bool transferSpillOrRestoreInst(MachineInstr &MI); /// Examines \p MI for any registers that it defines, and notifies trackers. - /// \returns true if MI was recognized and processed. void transferRegisterDef(MachineInstr &MI); /// Copy one location to the other, accounting for movement of subregisters diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index dd0a900fc2be0..97cc7a0c30343 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1444,10 +1444,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast(pImpl)->emitDebugValues(VRM); } -bool LiveDebugVariables::doInitialization(Module &M) { - return Pass::doInitialization(M); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { if (pImpl) diff --git a/llvm/lib/CodeGen/LiveDebugVariables.h b/llvm/lib/CodeGen/LiveDebugVariables.h index 74e738ec3e568..07dd3a83866fd 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/llvm/lib/CodeGen/LiveDebugVariables.h @@ -56,7 +56,6 @@ class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; - bool doInitialization(Module &) override; }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp index 36b863178b474..0afdee45cda76 100644 --- a/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 54441301d65b1..3d4f66f311749 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -8,6 +8,7 @@ #include "MIRVRegNamerUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" @@ -15,6 +16,11 @@ using namespace llvm; #define DEBUG_TYPE "mir-vregnamer-utils" +static cl::opt + UseStableNamerHash("mir-vreg-namer-use-stable-hash", cl::init(false), + cl::Hidden, + cl::desc("Use Stable Hashing for MIR VReg Renaming")); + using VRegRenameMap = std::map; bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) { @@ -52,6 +58,14 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { std::string S; raw_string_ostream OS(S); + if (UseStableNamerHash) { + auto Hash = stableHashValue(MI, /* HashVRegs */ true, + /* HashConstantPoolIndices */ true, + /* HashMemOperands */ true); + assert(Hash && "Expected non-zero Hash"); + return std::to_string(Hash).substr(0, 5); + } + // Gets a hashable artifact from a given MachineOperand (ie an unsigned). auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned { switch (MO.getType()) { diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index cece914d2eb17..76b69dfdcf718 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -153,22 +153,25 @@ void MachineOperand::removeRegFromUses() { /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. -void MachineOperand::ChangeToImmediate(int64_t ImmVal) { +void MachineOperand::ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); removeRegFromUses(); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; + setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm, + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); removeRegFromUses(); OpKind = MO_FPImmediate; Contents.CFP = FPImm; + setTargetFlags(TargetFlags); } void MachineOperand::ChangeToES(const char *SymName, @@ -197,7 +200,7 @@ void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an MCSymbol"); @@ -205,9 +208,10 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { OpKind = MO_MCSymbol; Contents.Sym = Sym; + setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToFrameIndex(int Idx) { +void MachineOperand::ChangeToFrameIndex(int Idx, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a FrameIndex"); @@ -215,6 +219,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) { OpKind = MO_FrameIndex; setIndex(Idx); + setTargetFlags(TargetFlags); } void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, @@ -415,6 +420,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { return nullptr; } +const char *MachineOperand::getTargetIndexName() const { + const MachineFunction *MF = getMFIfAvailable(*this); + return MF ? ::getTargetIndexName(*MF, this->getIndex()) : nullptr; +} + static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); for (const auto &I : Flags) { @@ -823,7 +833,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "target-index("; const char *Name = ""; if (const MachineFunction *MF = getMFIfAvailable(*this)) - if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex())) + if (const auto *TargetIndexName = ::getTargetIndexName(*MF, getIndex())) Name = TargetIndexName; OS << Name << ')'; printOperandOffset(OS, getOffset()); diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp new file mode 100644 index 0000000000000..fb14f0a33209f --- /dev/null +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -0,0 +1,194 @@ +//===- lib/CodeGen/MachineStableHash.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stable hashing for MachineInstr and MachineOperand. Useful or getting a +// hash across runs, modules, etc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineStableHash.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MIRFormatter.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StableHashing.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "machine-stable-hash" + +using namespace llvm; + +STATISTIC(StableHashBailingMachineBasicBlock, + "Number of encountered unsupported MachineOperands that were " + "MachineBasicBlocks while computing stable hashes"); +STATISTIC(StableHashBailingConstantPoolIndex, + "Number of encountered unsupported MachineOperands that were " + "ConstantPoolIndex while computing stable hashes"); +STATISTIC(StableHashBailingTargetIndexNoName, + "Number of encountered unsupported MachineOperands that were " + "TargetIndex with no name"); +STATISTIC(StableHashBailingGlobalAddress, + "Number of encountered unsupported MachineOperands that were " + "GlobalAddress while computing stable hashes"); +STATISTIC(StableHashBailingBlockAddress, + "Number of encountered unsupported MachineOperands that were " + "BlockAddress while computing stable hashes"); +STATISTIC(StableHashBailingMetadataUnsupported, + "Number of encountered unsupported MachineOperands that were " + "Metadata of an unsupported kind while computing stable hashes"); + +stable_hash llvm::stableHashValue(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (Register::isVirtualRegister(MO.getReg())) { + const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo(); + return MRI.getVRegDef(MO.getReg())->getOpcode(); + } + + // Register operands don't have target flags. + return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), + MO.isDef()); + case MachineOperand::MO_Immediate: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + case MachineOperand::MO_FPImmediate: { + auto Val = MO.isCImm() ? MO.getCImm()->getValue() + : MO.getFPImm()->getValueAPF().bitcastToAPInt(); + auto ValHash = + stable_hash_combine_array(Val.getRawData(), Val.getNumWords()); + return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash); + } + + case MachineOperand::MO_MachineBasicBlock: + StableHashBailingMachineBasicBlock++; + return 0; + case MachineOperand::MO_ConstantPoolIndex: + StableHashBailingConstantPoolIndex++; + return 0; + case MachineOperand::MO_BlockAddress: + StableHashBailingBlockAddress++; + return 0; + case MachineOperand::MO_Metadata: + StableHashBailingMetadataUnsupported++; + return 0; + case MachineOperand::MO_GlobalAddress: + StableHashBailingGlobalAddress++; + return 0; + case MachineOperand::MO_TargetIndex: { + if (const char *Name = MO.getTargetIndexName()) + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_string(Name), + MO.getOffset()); + StableHashBailingTargetIndexNoName++; + return 0; + } + + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_JumpTableIndex: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIndex()); + + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + stable_hash_combine_string(MO.getSymbolName())); + + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + + case MachineOperand::MO_ShuffleMask: { + std::vector ShuffleMaskHashes; + + llvm::transform( + MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes), + [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); }); + + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_array(ShuffleMaskHashes.data(), + ShuffleMaskHashes.size())); + } + case MachineOperand::MO_MCSymbol: { + auto SymbolName = MO.getMCSymbol()->getName(); + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_string(SymbolName)); + } + case MachineOperand::MO_CFIIndex: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getCFIIndex()); + case MachineOperand::MO_IntrinsicID: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIntrinsicID()); + case MachineOperand::MO_Predicate: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getPredicate()); + } + llvm_unreachable("Invalid machine operand type"); +} + +/// A stable hash value for machine instructions. +/// Returns 0 if no stable hash could be computed. +/// The hashing and equality testing functions ignore definitions so this is +/// useful for CSE, etc. +stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, + bool HashConstantPoolIndices, + bool HashMemOperands) { + // Build up a buffer of hash code components. + SmallVector HashComponents; + HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2); + HashComponents.push_back(MI.getOpcode()); + HashComponents.push_back(MI.getFlags()); + for (const MachineOperand &MO : MI.operands()) { + if (!HashVRegs && MO.isReg() && MO.isDef() && + Register::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + if (MO.isCPI()) { + HashComponents.push_back(stable_hash_combine( + MO.getType(), MO.getTargetFlags(), MO.getIndex())); + continue; + } + + stable_hash StableHash = stableHashValue(MO); + if (!StableHash) + return 0; + HashComponents.push_back(StableHash); + } + + for (const auto *Op : MI.memoperands()) { + if (!HashMemOperands) + break; + HashComponents.push_back(static_cast(Op->getSize())); + HashComponents.push_back(static_cast(Op->getFlags())); + HashComponents.push_back(static_cast(Op->getOffset())); + HashComponents.push_back(static_cast(Op->getOrdering())); + HashComponents.push_back(static_cast(Op->getAddrSpace())); + HashComponents.push_back(static_cast(Op->getSyncScopeID())); + HashComponents.push_back(static_cast(Op->getBaseAlign().value())); + HashComponents.push_back(static_cast(Op->getFailureOrdering())); + } + + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp index bae96eb84521a..a3ae099199845 100644 --- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -8,9 +8,9 @@ #include "PHIEliminationUtils.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" + using namespace llvm; // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 286d54386357f..37d8cdd695445 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7011,12 +7011,15 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // Check if the offsets line up for the native data layout of this target. bool NeedBswap = false; + bool NeedRotate = false; if (!checkOffsets(Layout.isLittleEndian())) { // Special-case: check if byte offsets line up for the opposite endian. - // TODO: We could use rotates for 16/32-bit merge pairs. - if (NarrowNumBits != 8 || !checkOffsets(Layout.isBigEndian())) + if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian())) + NeedBswap = true; + else if (NumStores == 2 && checkOffsets(Layout.isBigEndian())) + NeedRotate = true; + else return SDValue(); - NeedBswap = true; } SDLoc DL(N); @@ -7026,11 +7029,16 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue); } - // Before legalize we can introduce illegal bswaps which will be later + // Before legalize we can introduce illegal bswaps/rotates which will be later // converted to an explicit bswap sequence. This way we end up with a single // store and byte shuffling instead of several stores and byte shuffling. - if (NeedBswap) + if (NeedBswap) { SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue); + } else if (NeedRotate) { + assert(WideNumBits % 2 == 0 && "Unexpected type for rotate"); + SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT); + SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt); + } SDValue NewStore = DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e8cc916593fbc..34c563672753d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -986,8 +986,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVSETCC(const SDNode *N); - //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 82850f15feeef..2350248626c71 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -2029,7 +2030,9 @@ SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { int StackID = 0; if (Bytes.isScalable()) StackID = TFI->getStackIDForScalableVectors(); - int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, + // The stack id gives an indication of whether the object is scalable or + // not, so it's safe to pass in the minimum size here. + int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment, false, nullptr, StackID); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } @@ -3387,29 +3390,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::UMIN: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - // UMIN - we know that the result will have the maximum of the - // known zero leading bits of the inputs. - unsigned LeadZero = Known.countMinLeadingZeros(); - LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); - - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; - Known.Zero.setHighBits(LeadZero); + Known = KnownBits::umin(Known, Known2); break; } case ISD::UMAX: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - // UMAX - we know that the result will have the maximum of the - // known one leading bits of the inputs. - unsigned LeadOne = Known.countMinLeadingOnes(); - LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); - - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; - Known.One.setHighBits(LeadOne); + Known = KnownBits::umax(Known, Known2); break; } case ISD::SMIN: @@ -3443,12 +3430,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } } - // Fallback - just get the shared known bits of the operands. Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Known.isUnknown()) break; // Early-out Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; + if (IsMax) + Known = KnownBits::smax(Known, Known2); + else + Known = KnownBits::smin(Known, Known2); break; } case ISD::FrameIndex: @@ -6049,7 +6037,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SrcAlign = Alignment; assert(SrcAlign && "SrcAlign must be set"); ConstantDataArraySlice Slice; - bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); + // If marked as volatile, perform a copy even when marked as constant. + bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); const MemOp Op = isZeroConstant diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1a2c77974c2b9..5e6cb03f3839c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1128,6 +1128,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { // TODO: We could handle all flags (nsw, etc) here. // TODO: If an IR instruction maps to >1 node, only the final node will have // flags set. + // TODO: The handling of flags should be improved, see + // https://reviews.llvm.org/D86871 if (SDNode *Node = getNodeForIRValue(&I)) { SDNodeFlags IncomingFlags; IncomingFlags.copyFMF(*FPMO); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 60c0c20ffacdd..ae98edb74466d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2022,10 +2022,14 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::EXTRACT_VECTOR_ELT: { SDValue Src = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount(); unsigned EltBitWidth = Src.getScalarValueSizeInBits(); + if (SrcEltCnt.isScalable()) + return false; + // Demand the bits from every vector element without a constant index. + unsigned NumSrcElts = SrcEltCnt.getFixedValue(); APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); if (auto *CIdx = dyn_cast(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) @@ -2603,13 +2607,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero, TLO, Depth + 1)) return true; - KnownUndef.clearBit(Idx); - if (Scl.isUndef()) - KnownUndef.setBit(Idx); + KnownUndef.setBitVal(Idx, Scl.isUndef()); - KnownZero.clearBit(Idx); - if (isNullConstant(Scl) || isNullFPConstant(Scl)) - KnownZero.setBit(Idx); + KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl)); break; } diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index e246c2e5f55cb..3d961af8ec3e3 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. Optional MemLoc = MemoryLocation::getOrNone(I); - if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize) + if (MemLoc.hasValue() && MemLoc->Size.hasValue() && + MemLoc->Size.getValue() > AllocSize) return true; switch (I->getOpcode()) { case Instruction::Store: diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 078c9691f8dc4..12745747f5f80 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 88f118bb05e3e..d31c358798211 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -530,7 +530,13 @@ void DWARFContext::dump( DataExtractor StrData(Section, isLittleEndian(), 0); uint64_t Offset = 0; uint64_t StrOffset = 0; - while (const char *CStr = StrData.getCStr(&Offset)) { + while (StrData.isValidOffset(Offset)) { + Error Err = Error::success(); + const char *CStr = StrData.getCStr(&Offset, &Err); + if (Err) { + DumpOpts.WarningHandler(std::move(Err)); + return; + } OS << format("0x%8.8" PRIx64 ": \"", StrOffset); OS.write_escaped(CStr); OS << "\"\n"; diff --git a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp b/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp index 18de5b616eec8..d85f3c38feb9d 100644 --- a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp @@ -13,12 +13,14 @@ namespace orc { Expected> TPCDynamicLibrarySearchGenerator::Load(TargetProcessControl &TPC, - const char *LibraryPath) { + const char *LibraryPath, + SymbolPredicate Allow) { auto Handle = TPC.loadDylib(LibraryPath); if (!Handle) return Handle.takeError(); - return std::make_unique(TPC, *Handle); + return std::make_unique(TPC, *Handle, + std::move(Allow)); } Error TPCDynamicLibrarySearchGenerator::tryToGenerate( @@ -28,22 +30,38 @@ Error TPCDynamicLibrarySearchGenerator::tryToGenerate( if (Symbols.empty()) return Error::success(); + SymbolLookupSet LookupSymbols; + + for (auto &KV : Symbols) { + // Skip symbols that don't match the filter. + if (Allow && !Allow(KV.first)) + continue; + LookupSymbols.add(KV.first, SymbolLookupFlags::WeaklyReferencedSymbol); + } + SymbolMap NewSymbols; - TargetProcessControl::LookupRequestElement Request(H, Symbols); + TargetProcessControl::LookupRequestElement Request(H, LookupSymbols); auto Result = TPC.lookupSymbols(Request); if (!Result) return Result.takeError(); assert(Result->size() == 1 && "Results for more than one library returned"); - assert(Result->front().size() == Symbols.size() && + assert(Result->front().size() == LookupSymbols.size() && "Result has incorrect number of elements"); + SymbolNameVector MissingSymbols; auto ResultI = Result->front().begin(); - for (auto &KV : Symbols) - NewSymbols[KV.first] = - JITEvaluatedSymbol(*ResultI++, JITSymbolFlags::Exported); + for (auto &KV : LookupSymbols) + if (*ResultI) + NewSymbols[KV.first] = + JITEvaluatedSymbol(*ResultI++, JITSymbolFlags::Exported); + + // If there were no resolved symbols bail out. + if (NewSymbols.empty()) + return Error::success(); + // Define resolved symbols. return JD.define(absoluteSymbols(std::move(NewSymbols))); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp index 59c9ce2393c92..1e7736d1f40db 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp @@ -78,14 +78,14 @@ SelfTargetProcessControl::lookupSymbols(LookupRequest Request) { auto &Sym = KV.first; std::string Tmp((*Sym).data() + !!GlobalManglingPrefix, (*Sym).size() - !!GlobalManglingPrefix); - if (void *Addr = Dylib->getAddressOfSymbol(Tmp.c_str())) - R.back().push_back(pointerToJITTargetAddress(Addr)); - else if (KV.second == SymbolLookupFlags::RequiredSymbol) { + void *Addr = Dylib->getAddressOfSymbol(Tmp.c_str()); + if (!Addr && KV.second == SymbolLookupFlags::RequiredSymbol) { // FIXME: Collect all failing symbols before erroring out. SymbolNameVector MissingSymbols; MissingSymbols.push_back(Sym); return make_error(std::move(MissingSymbols)); } + R.back().push_back(pointerToJITTargetAddress(Addr)); } } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 369dc50895727..12286264c81df 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1208,15 +1208,11 @@ static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); } -static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, - bool IsSigned, bool IsAddition) { +static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI, + Intrinsic::ID IID) { Type *Ty = CI.getType(); Value *Op0 = CI.getOperand(0); Value *Op1 = CI.getOperand(1); - - Intrinsic::ID IID = - IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) - : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); @@ -1375,16 +1371,12 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, } static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { + Type *Ty = CI.getType(); Value *Op0 = CI.getArgOperand(0); - llvm::Type *Ty = Op0->getType(); - Value *Zero = llvm::Constant::getNullValue(Ty); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); - Value *Neg = Builder.CreateNeg(Op0); - Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); - + Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty); + Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)}); if (CI.getNumArgOperands() == 3) - Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); - + Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1)); return Res; } @@ -2490,23 +2482,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); } else if (IsX86 && (Name.startswith("sse2.padds.") || - Name.startswith("sse2.psubs.") || Name.startswith("avx2.padds.") || - Name.startswith("avx2.psubs.") || Name.startswith("avx512.padds.") || + Name.startswith("avx512.mask.padds."))) { + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat); + } else if (IsX86 && (Name.startswith("sse2.psubs.") || + Name.startswith("avx2.psubs.") || Name.startswith("avx512.psubs.") || - Name.startswith("avx512.mask.padds.") || Name.startswith("avx512.mask.psubs."))) { - bool IsAdd = Name.contains(".padds"); - Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat); } else if (IsX86 && (Name.startswith("sse2.paddus.") || - Name.startswith("sse2.psubus.") || Name.startswith("avx2.paddus.") || + Name.startswith("avx512.mask.paddus."))) { + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat); + } else if (IsX86 && (Name.startswith("sse2.psubus.") || Name.startswith("avx2.psubus.") || - Name.startswith("avx512.mask.paddus.") || Name.startswith("avx512.mask.psubus."))) { - bool IsAdd = Name.contains(".paddus"); - Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat); } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), CI->getArgOperand(1), diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 89fc0d073749c..6cae21e3cfe1a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -589,7 +589,8 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { Assert(!GV.isDSOLocal(), "GlobalValue with DLLImport Storage is dso_local!", &GV); - Assert((GV.isDeclaration() && GV.hasExternalLinkage()) || + Assert((GV.isDeclaration() && + (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) || GV.hasAvailableExternallyLinkage(), "Global is marked as dllimport, but not external", &GV); } diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 4adc9a22a7b2d..14dae848b3624 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -276,7 +276,7 @@ static void computeGUIDPreservedSymbols(const lto::InputFile &File, // Iterate the symbols in the input file and if the input has preserved symbol // compute the GUID for the symbol. for (const auto &Sym : File.symbols()) { - if (PreservedSymbols.count(Sym.getName())) + if (PreservedSymbols.count(Sym.getName()) && !Sym.getIRName().empty()) GUIDs.insert(GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( Sym.getIRName(), GlobalValue::ExternalLinkage, ""))); } diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 055689b16e8f4..186ddb3d2b81b 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1126,14 +1126,13 @@ void IRLinker::prepareCompileUnitsForImport() { assert(CU && "Expected valid compile unit"); // Enums, macros, and retained types don't need to be listed on the // imported DICompileUnit. This means they will only be imported - // if reached from the mapped IR. Do this by setting their value map - // entries to nullptr, which will automatically prevent their importing - // when reached from the DICompileUnit during metadata mapping. - ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr); - ValueMap.MD()[CU->getRawMacros()].reset(nullptr); - ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr); + // if reached from the mapped IR. + CU->replaceEnumTypes(nullptr); + CU->replaceMacros(nullptr); + CU->replaceRetainedTypes(nullptr); + // The original definition (or at least its debug info - if the variable is - // internalized an optimized away) will remain in the source module, so + // internalized and optimized away) will remain in the source module, so // there's no need to import them. // If LLVM ever does more advanced optimizations on global variables // (removing/localizing write operations, for instance) that can track @@ -1141,7 +1140,7 @@ void IRLinker::prepareCompileUnitsForImport() { // with care when it comes to debug info size. Emitting small CUs containing // only a few imported entities into every destination module may be very // size inefficient. - ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr); + CU->replaceGlobalVariables(nullptr); // Imported entities only need to be mapped in if they have local // scope, as those might correspond to an imported entity inside a @@ -1174,7 +1173,7 @@ void IRLinker::prepareCompileUnitsForImport() { else // If there were no local scope imported entities, we can map // the whole list to nullptr. - ValueMap.MD()[CU->getRawImportedEntities()].reset(nullptr); + CU->replaceImportedEntities(nullptr); } } } diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 49d863f258bf9..5296e2400765f 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -433,15 +433,18 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO: case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI: case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA: case MCSymbolRefExpr::VK_PPC_TLS: + case MCSymbolRefExpr::VK_PPC_TLS_PCREL: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL: case MCSymbolRefExpr::VK_PPC_TLSGD: case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index fb7aaae295dfa..4d62174f7e5e4 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -127,7 +127,7 @@ struct StructInfo { std::vector Fields; StringMap FieldsByName; - FieldInfo &addField(StringRef FieldName, FieldType FT); + FieldInfo &addField(StringRef FieldName, FieldType FT, size_t FieldSize); StructInfo() = default; @@ -330,7 +330,8 @@ struct FieldInfo { FieldInfo(FieldType FT) : Contents(FT) {} }; -FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { +FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, + size_t FieldSize) { if (!FieldName.empty()) FieldsByName[FieldName] = Fields.size(); Fields.emplace_back(FT); @@ -338,7 +339,7 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { if (IsUnion) { Field.Offset = 0; } else { - Size = llvm::alignTo(Size, Alignment); + Size = llvm::alignTo(Size, std::min(Alignment, FieldSize)); Field.Offset = Size; } return Field; @@ -759,13 +760,14 @@ class MasmParser : public MCAsmParser { // "real4", "real8" bool emitRealValues(const fltSemantics &Semantics); - bool addRealField(StringRef Name, const fltSemantics &Semantics); - bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics); + bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); + bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, + size_t Size); bool parseRealInstList( const fltSemantics &Semantics, SmallVectorImpl &Values, const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); bool parseDirectiveNamedRealValue(StringRef IDVal, - const fltSemantics &Semantics, + const fltSemantics &Semantics, size_t Size, StringRef Name, SMLoc NameLoc); bool parseOptionalAngleBracketOpen(); @@ -1094,6 +1096,14 @@ const AsmToken &MasmParser::Lex() { tok = &Lexer.Lex(); } + // Recognize and bypass line continuations. + while (tok->is(AsmToken::BackSlash) && + Lexer.peekTok().is(AsmToken::EndOfStatement)) { + // Eat both the backslash and the end of statement. + Lexer.Lex(); + tok = &Lexer.Lex(); + } + if (tok->is(AsmToken::Eof)) { // If this is the end of an included file, pop the parent file off the // include stack. @@ -1306,7 +1316,7 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' -/// primaryexpr ::= ~,+,- primaryexpr +/// primaryexpr ::= ~,+,-,'not' primaryexpr bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); @@ -1344,6 +1354,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return Error(FirstTokenLoc, "invalid token in expression"); } } + // Parse named bitwise negation. + if (Identifier.equals_lower("not")) { + if (parsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); + return false; + } // Parse symbol variant. std::pair Split; if (!MAI.useParensForSymbolVariant()) { @@ -1764,8 +1781,18 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc) { SMLoc StartLoc = Lexer.getLoc(); while (true) { + AsmToken::TokenKind TokKind = Lexer.getKind(); + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef Identifier = Lexer.getTok().getString(); + if (Identifier.equals_lower("and")) + TokKind = AsmToken::Amp; + else if (Identifier.equals_lower("not")) + TokKind = AsmToken::Exclaim; + else if (Identifier.equals_lower("or")) + TokKind = AsmToken::Pipe; + } MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + unsigned TokPrec = getBinOpPrecedence(TokKind, Kind); // If the next token is lower precedence than we are allowed to eat, return // successfully with what we ate already. @@ -2093,9 +2120,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_DQ: return parseDirectiveValue(IDVal, 8); case DK_REAL4: - return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); case DK_REAL8: - return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); case DK_STRUCT: case DK_UNION: return parseDirectiveNestedStruct(IDVal, DirKind); @@ -2318,12 +2345,12 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); case DK_REAL4: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4, + IDVal, IDLoc); case DK_REAL8: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, + IDVal, IDLoc); case DK_STRUCT: case DK_UNION: Lex(); @@ -3049,6 +3076,11 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, SMLoc EndLoc, StartLoc = Lexer.getLoc(); if (parseExpression(Expr, EndLoc)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); + Sym->setRedefinable(Var.Redefinable); + Sym->setVariableValue(Expr); + Sym->setExternal(false); + if (Expr->evaluateAsAbsolute(Var.NumericValue, getStreamer().getAssemblerPtr())) return false; @@ -3221,7 +3253,7 @@ bool MasmParser::parseScalarInitializer(unsigned Size, Lex(); } else { const MCExpr *Value; - if (checkForValidSection() || parseExpression(Value)) + if (parseExpression(Value)) return true; if (getTok().is(AsmToken::Identifier) && getTok().getString().equals_lower("dup")) { @@ -3281,7 +3313,7 @@ bool MasmParser::emitIntegralValues(unsigned Size) { // Add a field to the current structure. bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL); + FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size); IntFieldInfo &IntInfo = Field.Contents.IntInfo; Field.Type = Size; @@ -3441,6 +3473,9 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics, // Initialize real data values. bool MasmParser::emitRealValues(const fltSemantics &Semantics) { + if (checkForValidSection()) + return true; + SmallVector ValuesAsInt; if (parseRealInstList(Semantics, ValuesAsInt)) return true; @@ -3453,15 +3488,15 @@ bool MasmParser::emitRealValues(const fltSemantics &Semantics) { } // Add a real field to the current struct. -bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { +bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, + size_t Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_REAL); + FieldInfo &Field = Struct.addField(Name, FT_REAL, Size); RealFieldInfo &RealInfo = Field.Contents.RealInfo; Field.SizeOf = 0; - if (checkForValidSection() || - parseRealInstList(Semantics, RealInfo.AsIntValues)) + if (parseRealInstList(Semantics, RealInfo.AsIntValues)) return true; Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; @@ -3477,15 +3512,13 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { /// parseDirectiveRealValue /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, - const fltSemantics &Semantics) { - if (checkForValidSection()) - return true; - + const fltSemantics &Semantics, + size_t Size) { if (StructInProgress.empty()) { // Initialize data value. if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField("", Semantics)) { + } else if (addRealField("", Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3495,17 +3528,15 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, /// ::= name (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, - StringRef Name, SMLoc NameLoc) { - if (checkForValidSection()) - return true; - + size_t Size, StringRef Name, + SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField(Name, Semantics)) { + } else if (addRealField(Name, Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3935,7 +3966,7 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) { // Declare a field in the current struct. bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { StructInfo &OwningStruct = StructInProgress.back(); - FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT); + FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; StructInfo.Structure = Structure; @@ -4109,7 +4140,8 @@ bool MasmParser::parseDirectiveNestedEnds() { else ParentStruct.Size += Structure.Size; } else { - FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); + FieldInfo &Field = + ParentStruct.addField(Structure.Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; Field.Type = Structure.Size; Field.LengthOf = 1; diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index d81687295bd04..fb0de40fc6d5f 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -280,6 +280,9 @@ ARM64CountOfUnwindCodes(const std::vector &Insns) { case Win64EH::UOP_AllocLarge: Count += 4; break; + case Win64EH::UOP_SaveR19R20X: + Count += 1; + break; case Win64EH::UOP_SaveFPLRX: Count += 1; break; @@ -298,6 +301,9 @@ ARM64CountOfUnwindCodes(const std::vector &Insns) { case Win64EH::UOP_SaveRegX: Count += 2; break; + case Win64EH::UOP_SaveLRPair: + Count += 2; + break; case Win64EH::UOP_SaveFReg: Count += 2; break; @@ -322,6 +328,21 @@ ARM64CountOfUnwindCodes(const std::vector &Insns) { case Win64EH::UOP_End: Count += 1; break; + case Win64EH::UOP_SaveNext: + Count += 1; + break; + case Win64EH::UOP_TrapFrame: + Count += 1; + break; + case Win64EH::UOP_PushMachFrame: + Count += 1; + break; + case Win64EH::UOP_Context: + Count += 1; + break; + case Win64EH::UOP_ClearUnwoundToCall: + Count += 1; + break; } } return Count; @@ -375,6 +396,11 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, b = 0xE3; streamer.emitInt8(b); break; + case Win64EH::UOP_SaveR19R20X: + b = 0x20; + b |= (inst.Offset >> 3) & 0x1F; + streamer.emitInt8(b); + break; case Win64EH::UOP_SaveFPLRX: b = 0x80; b |= ((inst.Offset - 1) >> 3) & 0x3F; @@ -417,6 +443,16 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1); streamer.emitInt8(b); break; + case Win64EH::UOP_SaveLRPair: + assert(inst.Register >= 19 && "Saved reg must be >= 19"); + reg = inst.Register - 19; + assert((reg % 2) == 0 && "Saved reg must be 19+2*X"); + reg /= 2; + b = 0xD6 | ((reg & 0x7) >> 2); + streamer.emitInt8(b); + b = ((reg & 0x3) << 6) | (inst.Offset >> 3); + streamer.emitInt8(b); + break; case Win64EH::UOP_SaveFReg: assert(inst.Register >= 8 && "Saved dreg must be >= 8"); reg = inst.Register - 8; @@ -453,6 +489,26 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, b = 0xE4; streamer.emitInt8(b); break; + case Win64EH::UOP_SaveNext: + b = 0xE6; + streamer.emitInt8(b); + break; + case Win64EH::UOP_TrapFrame: + b = 0xE8; + streamer.emitInt8(b); + break; + case Win64EH::UOP_PushMachFrame: + b = 0xE9; + streamer.emitInt8(b); + break; + case Win64EH::UOP_Context: + b = 0xEA; + streamer.emitInt8(b); + break; + case Win64EH::UOP_ClearUnwoundToCall: + b = 0xEC; + streamer.emitInt8(b); + break; } } diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp index 7ea5506f11d6a..11a24a6889f14 100644 --- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -196,15 +196,9 @@ void RegisterFile::addRegisterWrite(WriteRef Write, // Update zero registers. MCPhysReg ZeroRegisterID = WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); - if (IsWriteZero) { - ZeroRegisters.setBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.setBit(*I); - } else { - ZeroRegisters.clearBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.clearBit(*I); - } + ZeroRegisters.setBitVal(ZeroRegisterID, IsWriteZero); + for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) + ZeroRegisters.setBitVal(*I, IsWriteZero); // If this is move has been eliminated, then the call to tryEliminateMove // should have already updated all the register mappings. @@ -233,10 +227,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, RegisterMappings[*I].second.AliasRegID = 0U; } - if (IsWriteZero) - ZeroRegisters.setBit(*I); - else - ZeroRegisters.clearBit(*I); + ZeroRegisters.setBitVal(*I, IsWriteZero); } } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 0e06af92d64bc..c6e9ee175adc8 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -152,6 +152,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, break; } break; + case ELF::EM_CSKY: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/CSKY.def" + default: + break; + } + break; default: break; } @@ -194,6 +201,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) { case ELF::EM_SPARC32PLUS: case ELF::EM_SPARCV9: return ELF::R_SPARC_RELATIVE; + case ELF::EM_CSKY: + return ELF::R_CKCORE_RELATIVE; case ELF::EM_AMDGPU: break; case ELF::EM_BPF: diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index a0a445ae0c9db..bf29f40579ceb 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -594,7 +594,7 @@ Error DWARFYAML::emitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) { } Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) { - for (const AddrTableEntry &TableEntry : DI.DebugAddr) { + for (const AddrTableEntry &TableEntry : *DI.DebugAddr) { uint8_t AddrSize; if (TableEntry.AddrSize) AddrSize = *TableEntry.AddrSize; diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 046dddbf9a397..353e5058a0e5d 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -32,7 +32,7 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SecNames.insert("debug_ranges"); if (!DebugLines.empty()) SecNames.insert("debug_line"); - if (!DebugAddr.empty()) + if (DebugAddr) SecNames.insert("debug_addr"); if (!DebugAbbrev.empty()) SecNames.insert("debug_abbrev"); diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 19f909e565644..2ebea1176a6f0 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -230,6 +230,7 @@ void ScalarEnumerationTraits::enumeration( ECase(EM_LANAI); ECase(EM_BPF); ECase(EM_VE); + ECase(EM_CSKY); #undef ECase IO.enumFallback(Value); } @@ -674,6 +675,9 @@ void ScalarEnumerationTraits::enumeration( case ELF::EM_VE: #include "llvm/BinaryFormat/ELFRelocs/VE.def" break; + case ELF::EM_CSKY: +#include "llvm/BinaryFormat/ELFRelocs/CSKY.def" + break; case ELF::EM_PPC64: #include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" break; diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index ecae386fd1ba9..9eba391032b98 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -285,16 +285,20 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) { return createStringError( errc::invalid_argument, "wrote too much data somewhere, section offsets don't line up"); - if (0 == strncmp(&Sec.segname[0], "__DWARF", sizeof(Sec.segname))) { - StringRef SectName(Sec.sectname, - strnlen(Sec.sectname, sizeof(Sec.sectname))); - if (Obj.DWARF.getNonEmptySectionNames().count(SectName.substr(2))) { - auto EmitFunc = - DWARFYAML::getDWARFEmitterByName(SectName.substr(2)); - if (Error Err = EmitFunc(OS, Obj.DWARF)) - return Err; - } + StringRef SectName(Sec.sectname, + strnlen(Sec.sectname, sizeof(Sec.sectname))); + // If the section's content is specified in the 'DWARF' entry, we will + // emit it regardless of the section's segname. + if (Obj.DWARF.getNonEmptySectionNames().count(SectName.substr(2))) { + if (Sec.content) + return createStringError(errc::invalid_argument, + "cannot specify section '" + SectName + + "' contents in the 'DWARF' entry and " + "the 'content' at the same time"); + auto EmitFunc = DWARFYAML::getDWARFEmitterByName(SectName.substr(2)); + if (Error Err = EmitFunc(OS, Obj.DWARF)) + return Err; continue; } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3a6b736dae3cf..9df6a985789ea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -39,6 +39,7 @@ #include "llvm/Analysis/InstCount.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Lint.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 12e04ad91128d..b0d1d2a63a830 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -205,6 +205,7 @@ FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("float2int", Float2IntPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) +FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 9a6f93feaa29f..fc339de45af43 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -338,8 +338,7 @@ void APInt::flipAllBitsSlowCase() { /// Toggles a given bit to its opposite value. void APInt::flipBit(unsigned bitPosition) { assert(bitPosition < BitWidth && "Out of the bit-width range!"); - if ((*this)[bitPosition]) clearBit(bitPosition); - else setBit(bitPosition); + setBitVal(bitPosition, !(*this)[bitPosition]); } void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { @@ -393,12 +392,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // General case - set/clear individual bits in dst based on src. // TODO - there is scope for optimization here, but at the moment this code // path is barely used so prefer readability over performance. - for (unsigned i = 0; i != subBitWidth; ++i) { - if (subBits[i]) - setBit(bitPosition + i); - else - clearBit(bitPosition + i); - } + for (unsigned i = 0; i != subBitWidth; ++i) + setBitVal(bitPosition + i, subBits[i]); } void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp index e962657730fe5..23b9f962422e8 100644 --- a/llvm/lib/Support/ErrorHandling.cpp +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -168,9 +168,11 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { #else // Don't call the normal error handler. It may allocate memory. Directly write // an OOM to stderr and abort. - char OOMMessage[] = "LLVM ERROR: out of memory\n"; - ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage)); - (void)written; + const char *OOMMessage = "LLVM ERROR: out of memory\n"; + const char *Newline = "\n"; + (void)::write(2, OOMMessage, strlen(OOMMessage)); + (void)::write(2, Reason, strlen(Reason)); + (void)::write(2, Newline, strlen(Newline)); abort(); #endif } diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 1ff66d504cbea..aad50e1240341 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -83,6 +83,68 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW, return KnownOut; } +KnownBits KnownBits::makeGE(const APInt &Val) const { + // Count the number of leading bit positions where our underlying value is + // known to be less than or equal to Val. + unsigned N = (Zero | Val).countLeadingOnes(); + + // For each of those bit positions, if Val has a 1 in that bit then our + // underlying value must also have a 1. + APInt MaskedVal(Val); + MaskedVal.clearLowBits(getBitWidth() - N); + return KnownBits(Zero, One | MaskedVal); +} + +KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) { + // If we can prove that LHS >= RHS then use LHS as the result. Likewise for + // RHS. Ideally our caller would already have spotted these cases and + // optimized away the umax operation, but we handle them here for + // completeness. + if (LHS.getMinValue().uge(RHS.getMaxValue())) + return LHS; + if (RHS.getMinValue().uge(LHS.getMaxValue())) + return RHS; + + // If the result of the umax is LHS then it must be greater than or equal to + // the minimum possible value of RHS. Likewise for RHS. Any known bits that + // are common to these two values are also known in the result. + KnownBits L = LHS.makeGE(RHS.getMinValue()); + KnownBits R = RHS.makeGE(LHS.getMinValue()); + return KnownBits(L.Zero & R.Zero, L.One & R.One); +} + +KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) { + // Flip the range of values: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0] + auto Flip = [](KnownBits Val) { return KnownBits(Val.One, Val.Zero); }; + return Flip(umax(Flip(LHS), Flip(RHS))); +} + +KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { + // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF] + auto Flip = [](KnownBits Val) { + unsigned SignBitPosition = Val.getBitWidth() - 1; + APInt Zero = Val.Zero; + APInt One = Val.One; + Zero.setBitVal(SignBitPosition, Val.One[SignBitPosition]); + One.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]); + return KnownBits(Zero, One); + }; + return Flip(umax(Flip(LHS), Flip(RHS))); +} + +KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { + // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0] + auto Flip = [](KnownBits Val) { + unsigned SignBitPosition = Val.getBitWidth() - 1; + APInt Zero = Val.One; + APInt One = Val.Zero; + Zero.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]); + One.setBitVal(SignBitPosition, Val.One[SignBitPosition]); + return KnownBits(Zero, One); + }; + return Flip(umax(Flip(LHS), Flip(RHS))); +} + KnownBits &KnownBits::operator&=(const KnownBits &RHS) { // Result bit is 0 if either operand bit is 0. Zero |= RHS.Zero; diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 6d5fe7165f633..debde5cdad5b6 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -12,6 +12,9 @@ #include "llvm/ADT/SmallVector.h" #include +#ifdef LLVM_ENABLE_EXCEPTIONS +#include +#endif using namespace llvm; // Check that no bytes are wasted and everything is well-aligned. @@ -42,26 +45,50 @@ static_assert(sizeof(SmallVector) == sizeof(void *) * 2 + sizeof(void *), "1 byte elements have word-sized type for size and capacity"); +template +void SmallVectorBase::report_size_overflow(size_t MinSize) { + std::string Reason = "SmallVector unable to grow. Requested capacity (" + + std::to_string(MinSize) + + ") is larger than maximum value for size type (" + + std::to_string(SizeTypeMax()) + ")"; +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif +} + +template void SmallVectorBase::report_at_maximum_capacity() { + std::string Reason = + "SmallVector capacity unable to grow. Already at maximum size " + + std::to_string(SizeTypeMax()); +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif +} + // Note: Moving this function into the header may cause performance regression. template -void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity, +void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSize, size_t TSize) { // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinCapacity > SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity overflow during allocation"); + if (MinSize > SizeTypeMax()) + report_size_overflow(MinSize); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a - // default MinCapacity of 0, but the current capacity cannot be increased. + // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. if (capacity() == SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity unable to grow"); + report_at_maximum_capacity(); // In theory 2*capacity can overflow if the capacity is 64 bit, but the // original capacity would never be large enough for this to be a problem. size_t NewCapacity = 2 * capacity() + 1; // Always grow. - NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax()); + NewCapacity = std::min(std::max(NewCapacity, MinSize), SizeTypeMax()); void *NewElts; if (BeginX == FirstEl) { diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 5b757c9ea80db..bbde44c30caaa 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -1159,6 +1159,17 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { return ExternalContentsPrefixDir; } +void RedirectingFileSystem::setFallthrough(bool Fallthrough) { + IsFallthrough = Fallthrough; +} + +std::vector RedirectingFileSystem::getRoots() const { + std::vector R; + for (const auto &Root : Roots) + R.push_back(Root->getName()); + return R; +} + void RedirectingFileSystem::dump(raw_ostream &OS) const { for (const auto &Root : Roots) dumpEntry(OS, Root.get()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e1b79393f25f2..063644716a654 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -139,6 +139,12 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::FCEIL_MERGE_PASSTHRU: + case AArch64ISD::FFLOOR_MERGE_PASSTHRU: + case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU: + case AArch64ISD::FRINT_MERGE_PASSTHRU: + case AArch64ISD::FROUND_MERGE_PASSTHRU: + case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: + case AArch64ISD::FTRUNC_MERGE_PASSTHRU: return true; } } @@ -976,6 +982,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction(ISD::FNEARBYINT, VT, Custom); + setOperationAction(ISD::FRINT, VT, Custom); + setOperationAction(ISD::FROUND, VT, Custom); + setOperationAction(ISD::FROUNDEVEN, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); } } @@ -1482,6 +1494,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3346,6 +3364,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_frintp: return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintm: + return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frinti: + return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintx: + return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frinta: + return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintn: + return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintz: + return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_convert_to_svbool: { EVT OutVT = Op.getValueType(); EVT InVT = Op.getOperand(1).getValueType(); @@ -3645,6 +3681,18 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); case ISD::FCEIL: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU); + case ISD::FFLOOR: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU); + case ISD::FNEARBYINT: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU); + case ISD::FRINT: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU); + case ISD::FROUND: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU); + case ISD::FROUNDEVEN: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU); + case ISD::FTRUNC: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU); case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 38caa6a481418..d6e511891752a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -96,7 +96,13 @@ enum NodeType : unsigned { // Predicated instructions with the result of inactive lanes provided by the // last operand. FCEIL_MERGE_PASSTHRU, + FFLOOR_MERGE_PASSTHRU, + FNEARBYINT_MERGE_PASSTHRU, FNEG_MERGE_PASSTHRU, + FRINT_MERGE_PASSTHRU, + FROUND_MERGE_PASSTHRU, + FROUNDEVEN_MERGE_PASSTHRU, + FTRUNC_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, ZERO_EXTEND_INREG_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eadf23dc46225..e01a34242a8d7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -203,6 +203,12 @@ def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith> def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -1416,13 +1422,13 @@ multiclass sve_prefetch; defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>; - defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64frintp_mt>; - defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>; - defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>; - defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>; - defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>; - defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>; + defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", null_frag, AArch64frintn_mt>; + defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", null_frag, AArch64frintp_mt>; + defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", null_frag, AArch64frintm_mt>; + defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", null_frag, AArch64frintz_mt>; + defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", null_frag, AArch64frinta_mt>; + defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", null_frag, AArch64frintx_mt>; + defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", null_frag, AArch64frinti_mt>; defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>; defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index a74d15de25566..08a29bbb3e87a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -186,12 +186,14 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseDirectiveSEHAllocStack(SMLoc L); bool parseDirectiveSEHPrologEnd(SMLoc L); + bool parseDirectiveSEHSaveR19R20X(SMLoc L); bool parseDirectiveSEHSaveFPLR(SMLoc L); bool parseDirectiveSEHSaveFPLRX(SMLoc L); bool parseDirectiveSEHSaveReg(SMLoc L); bool parseDirectiveSEHSaveRegX(SMLoc L); bool parseDirectiveSEHSaveRegP(SMLoc L); bool parseDirectiveSEHSaveRegPX(SMLoc L); + bool parseDirectiveSEHSaveLRPair(SMLoc L); bool parseDirectiveSEHSaveFReg(SMLoc L); bool parseDirectiveSEHSaveFRegX(SMLoc L); bool parseDirectiveSEHSaveFRegP(SMLoc L); @@ -199,8 +201,13 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseDirectiveSEHSetFP(SMLoc L); bool parseDirectiveSEHAddFP(SMLoc L); bool parseDirectiveSEHNop(SMLoc L); + bool parseDirectiveSEHSaveNext(SMLoc L); bool parseDirectiveSEHEpilogStart(SMLoc L); bool parseDirectiveSEHEpilogEnd(SMLoc L); + bool parseDirectiveSEHTrapFrame(SMLoc L); + bool parseDirectiveSEHMachineFrame(SMLoc L); + bool parseDirectiveSEHContext(SMLoc L); + bool parseDirectiveSEHClearUnwoundToCall(SMLoc L); bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, SmallVectorImpl &Loc); @@ -5174,6 +5181,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveSEHAllocStack(Loc); else if (IDVal == ".seh_endprologue") parseDirectiveSEHPrologEnd(Loc); + else if (IDVal == ".seh_save_r19r20_x") + parseDirectiveSEHSaveR19R20X(Loc); else if (IDVal == ".seh_save_fplr") parseDirectiveSEHSaveFPLR(Loc); else if (IDVal == ".seh_save_fplr_x") @@ -5186,6 +5195,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveSEHSaveRegP(Loc); else if (IDVal == ".seh_save_regp_x") parseDirectiveSEHSaveRegPX(Loc); + else if (IDVal == ".seh_save_lrpair") + parseDirectiveSEHSaveLRPair(Loc); else if (IDVal == ".seh_save_freg") parseDirectiveSEHSaveFReg(Loc); else if (IDVal == ".seh_save_freg_x") @@ -5200,10 +5211,20 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveSEHAddFP(Loc); else if (IDVal == ".seh_nop") parseDirectiveSEHNop(Loc); + else if (IDVal == ".seh_save_next") + parseDirectiveSEHSaveNext(Loc); else if (IDVal == ".seh_startepilogue") parseDirectiveSEHEpilogStart(Loc); else if (IDVal == ".seh_endepilogue") parseDirectiveSEHEpilogEnd(Loc); + else if (IDVal == ".seh_trap_frame") + parseDirectiveSEHTrapFrame(Loc); + else if (IDVal == ".seh_pushframe") + parseDirectiveSEHMachineFrame(Loc); + else if (IDVal == ".seh_context") + parseDirectiveSEHContext(Loc); + else if (IDVal == ".seh_clear_unwound_to_call") + parseDirectiveSEHClearUnwoundToCall(Loc); else return true; } else @@ -5645,6 +5666,16 @@ bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) { return false; } +/// parseDirectiveSEHSaveR19R20X +/// ::= .seh_save_r19r20_x +bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) { + int64_t Offset; + if (parseImmExpr(Offset)) + return true; + getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset); + return false; +} + /// parseDirectiveSEHSaveFPLR /// ::= .seh_save_fplr bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) { @@ -5713,6 +5744,22 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) { return false; } +/// parseDirectiveSEHSaveLRPair +/// ::= .seh_save_lrpair +bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) { + unsigned Reg; + int64_t Offset; + L = getLoc(); + if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || + parseComma() || parseImmExpr(Offset)) + return true; + if (check(((Reg - 19) % 2 != 0), L, + "expected register with even offset from x19")) + return true; + getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset); + return false; +} + /// parseDirectiveSEHSaveFReg /// ::= .seh_save_freg bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) { @@ -5785,6 +5832,13 @@ bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) { return false; } +/// parseDirectiveSEHSaveNext +/// ::= .seh_save_next +bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) { + getTargetStreamer().EmitARM64WinCFISaveNext(); + return false; +} + /// parseDirectiveSEHEpilogStart /// ::= .seh_startepilogue bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) { @@ -5799,6 +5853,34 @@ bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) { return false; } +/// parseDirectiveSEHTrapFrame +/// ::= .seh_trap_frame +bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) { + getTargetStreamer().EmitARM64WinCFITrapFrame(); + return false; +} + +/// parseDirectiveSEHMachineFrame +/// ::= .seh_pushframe +bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) { + getTargetStreamer().EmitARM64WinCFIMachineFrame(); + return false; +} + +/// parseDirectiveSEHContext +/// ::= .seh_context +bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) { + getTargetStreamer().EmitARM64WinCFIContext(); + return false; +} + +/// parseDirectiveSEHClearUnwoundToCall +/// ::= .seh_clear_unwound_to_call +bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) { + getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall(); + return false; +} + bool AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, AArch64MCExpr::VariantKind &ELFRefKind, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index ce40e96814670..3e0e9ba9f5f7f 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -50,6 +50,9 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { void EmitARM64WinCFIAllocStack(unsigned Size) override { OS << "\t.seh_stackalloc " << Size << "\n"; } + void EmitARM64WinCFISaveR19R20X(int Offset) override { + OS << "\t.seh_save_r19r20_x " << Offset << "\n"; + } void EmitARM64WinCFISaveFPLR(int Offset) override { OS << "\t.seh_save_fplr " << Offset << "\n"; } @@ -68,6 +71,9 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override { OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n"; } + void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override { + OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n"; + } void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override { OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n"; } @@ -85,9 +91,16 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { OS << "\t.seh_add_fp " << Size << "\n"; } void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; } + void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; } void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; } void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; } void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; } + void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; } + void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; } + void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; } + void EmitARM64WinCFIClearUnwoundToCall() override { + OS << "\t.seh_clear_unwound_to_call\n"; + } public: AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index 3a0c5d8318dd5..c0dee085caced 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -37,12 +37,14 @@ class AArch64TargetStreamer : public MCTargetStreamer { virtual void emitInst(uint32_t Inst); virtual void EmitARM64WinCFIAllocStack(unsigned Size) {} + virtual void EmitARM64WinCFISaveR19R20X(int Offset) {} virtual void EmitARM64WinCFISaveFPLR(int Offset) {} virtual void EmitARM64WinCFISaveFPLRX(int Offset) {} virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {} + virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {} @@ -50,9 +52,14 @@ class AArch64TargetStreamer : public MCTargetStreamer { virtual void EmitARM64WinCFISetFP() {} virtual void EmitARM64WinCFIAddFP(unsigned Size) {} virtual void EmitARM64WinCFINop() {} + virtual void EmitARM64WinCFISaveNext() {} virtual void EmitARM64WinCFIPrologEnd() {} virtual void EmitARM64WinCFIEpilogStart() {} virtual void EmitARM64WinCFIEpilogEnd() {} + virtual void EmitARM64WinCFITrapFrame() {} + virtual void EmitARM64WinCFIMachineFrame() {} + virtual void EmitARM64WinCFIContext() {} + virtual void EmitARM64WinCFIClearUnwoundToCall() {} private: std::unique_ptr ConstantPools; @@ -82,12 +89,14 @@ class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { // The unwind codes on ARM64 Windows are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling void EmitARM64WinCFIAllocStack(unsigned Size) override; + void EmitARM64WinCFISaveR19R20X(int Offset) override; void EmitARM64WinCFISaveFPLR(int Offset) override; void EmitARM64WinCFISaveFPLRX(int Offset) override; void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override; + void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override; @@ -95,9 +104,15 @@ class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { void EmitARM64WinCFISetFP() override; void EmitARM64WinCFIAddFP(unsigned Size) override; void EmitARM64WinCFINop() override; + void EmitARM64WinCFISaveNext() override; void EmitARM64WinCFIPrologEnd() override; void EmitARM64WinCFIEpilogStart() override; void EmitARM64WinCFIEpilogEnd() override; + void EmitARM64WinCFITrapFrame() override; + void EmitARM64WinCFIMachineFrame() override; + void EmitARM64WinCFIContext() override; + void EmitARM64WinCFIClearUnwoundToCall() override; + private: void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset); }; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index 03fbab5142a2e..a07416420fe9e 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -85,6 +85,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) { EmitARM64WinUnwindCode(Op, -1, Size); } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) { + EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset); +} + void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) { EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset); } @@ -115,6 +119,11 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg, EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset); } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg, + int Offset) { + EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset); +} + void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) { assert(Offset >= 0 && Offset <= 504 && @@ -150,6 +159,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() { EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0); } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() { + EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0); +} + // The functions below handle opcodes that can end up in either a prolog or // an epilog, but not both. void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() { @@ -188,6 +201,22 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() { CurrentEpilog = nullptr; } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() { + EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0); +} + +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() { + EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0); +} + +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() { + EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0); +} + +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() { + EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0); +} + MCWinCOFFStreamer *createAArch64WinCOFFStreamer( MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index ee6d35ddddf8a..07b4992bbf571 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -44,6 +44,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/IntegerDivision.h" #include #include diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 151b1bdd55381..5dd42d1f4a6a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -521,8 +521,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { return true; // TODO: Move into isKnownNeverNaN - if (N->getFlags().isDefined()) - return N->getFlags().hasNoNaNs(); + if (N->getFlags().hasNoNaNs()) + return true; return CurDAG->isKnownNeverNaN(N); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index c7fdc79c3b1a0..932a05a4ba8c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -150,8 +150,8 @@ class AMDGPUTargetLowering : public TargetLowering { return true; const auto Flags = Op.getNode()->getFlags(); - if (Flags.isDefined()) - return Flags.hasNoSignedZeros(); + if (Flags.hasNoSignedZeros()) + return true; return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f882bfb568ee2..fb954e6bbba1c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2833875e438cd..db74f8a54c0af 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1340,7 +1340,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser { const int64_t Width, const SMLoc Loc); - void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; @@ -4705,22 +4704,18 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, if (getLexer().is(AsmToken::Comma)) Parser.Lex(); - switch (Res) { - case MatchOperand_Success: break; - case MatchOperand_ParseFail: + if (Res != MatchOperand_Success) { + if (!Parser.hasPendingError()) { // FIXME: use real operand location rather than the current location. - Error(getLexer().getLoc(), "failed parsing operand."); - while (!getLexer().is(AsmToken::EndOfStatement)) { - Parser.Lex(); - } - return true; - case MatchOperand_NoMatch: - // FIXME: use real operand location rather than the current location. - Error(getLexer().getLoc(), "not a valid operand."); - while (!getLexer().is(AsmToken::EndOfStatement)) { - Parser.Lex(); - } - return true; + StringRef Msg = + (Res == MatchOperand_ParseFail) ? "failed parsing operand." : + "not a valid operand."; + Error(getLexer().getLoc(), Msg); + } + while (!getLexer().is(AsmToken::EndOfStatement)) { + Parser.Lex(); + } + return true; } } @@ -5004,8 +4999,10 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, } if (Dfmt == DFMT_UNDEF) { Error(Loc, "duplicate numeric format"); - } else if (Nfmt == NFMT_UNDEF){ + return MatchOperand_ParseFail; + } else if (Nfmt == NFMT_UNDEF) { Error(Loc, "duplicate data format"); + return MatchOperand_ParseFail; } } @@ -5014,8 +5011,10 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, if (isGFX10()) { auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); - if (Ufmt == UFMT_UNDEF) + if (Ufmt == UFMT_UNDEF) { Error(FormatLoc, "unsupported format"); + return MatchOperand_ParseFail; + } Format = Ufmt; } else { Format = encodeDfmtNfmt(Dfmt, Nfmt); @@ -5077,7 +5076,9 @@ AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { if (Res != MatchOperand_Success) return Res; - skipToken(AsmToken::RBrac, "expected a closing square bracket"); + if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) + return MatchOperand_ParseFail; + return MatchOperand_Success; } @@ -5119,7 +5120,10 @@ AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { trySkipToken(AsmToken::Comma); if (!FormatFound) { - if (parseSymbolicOrNumericFormat(Format) == MatchOperand_Success) { + Res = parseSymbolicOrNumericFormat(Format); + if (Res == MatchOperand_ParseFail) + return Res; + if (Res == MatchOperand_Success) { auto Size = Operands.size(); AMDGPUOperand &Op = static_cast(*Operands[Size - 2]); assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); @@ -5340,12 +5344,14 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = getLoc(); - // If parse failed, do not return error code - // to avoid excessive error messages. if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { - while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); + while (!isToken(AsmToken::EndOfStatement)) { + if (!parseCnt(Waitcnt)) + return MatchOperand_ParseFail; + } } else { - parseExpr(Waitcnt); + if (!parseExpr(Waitcnt)) + return MatchOperand_ParseFail; } Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); @@ -5419,8 +5425,6 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { int64_t ImmVal = 0; SMLoc Loc = getLoc(); - // If parse failed, do not return error code - // to avoid excessive error messages. if (trySkipId("hwreg", AsmToken::LParen)) { OperandInfoTy HwReg(ID_UNKNOWN_); int64_t Offset = OFFSET_DEFAULT_; @@ -5428,10 +5432,16 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { if (parseHwregBody(HwReg, Offset, Width) && validateHwreg(HwReg, Offset, Width, Loc)) { ImmVal = encodeHwreg(HwReg.Id, Offset, Width); + } else { + return MatchOperand_ParseFail; } } else if (parseExpr(ImmVal)) { - if (ImmVal < 0 || !isUInt<16>(ImmVal)) + if (ImmVal < 0 || !isUInt<16>(ImmVal)) { Error(Loc, "invalid immediate: only 16-bit values are legal"); + return MatchOperand_ParseFail; + } + } else { + return MatchOperand_ParseFail; } Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); @@ -5518,8 +5528,6 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { int64_t ImmVal = 0; SMLoc Loc = getLoc(); - // If parse failed, do not return error code - // to avoid excessive error messages. if (trySkipId("sendmsg", AsmToken::LParen)) { OperandInfoTy Msg(ID_UNKNOWN_); OperandInfoTy Op(OP_NONE_); @@ -5527,10 +5535,16 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { if (parseSendMsgBody(Msg, Op, Stream) && validateSendMsg(Msg, Op, Stream, Loc)) { ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); + } else { + return MatchOperand_ParseFail; } } else if (parseExpr(ImmVal)) { - if (ImmVal < 0 || !isUInt<16>(ImmVal)) + if (ImmVal < 0 || !isUInt<16>(ImmVal)) { Error(Loc, "invalid immediate: only 16-bit values are legal"); + return MatchOperand_ParseFail; + } + } else { + return MatchOperand_ParseFail; } Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); @@ -5594,7 +5608,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { Parser.Lex(); if (Attr > 63) { Error(S, "out of bounds attr"); - return MatchOperand_Success; + return MatchOperand_ParseFail; } SMLoc SChan = SMLoc::getFromPointer(Chan.data()); @@ -5610,10 +5624,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { // exp //===----------------------------------------------------------------------===// -void AMDGPUAsmParser::errorExpTgt() { - Error(Parser.getTok().getLoc(), "invalid exp target"); -} - OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, uint8_t &Val) { if (Str == "null") { @@ -5631,8 +5641,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - if (Val > 7) - errorExpTgt(); + if (Val > 7) { + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; + } return MatchOperand_Success; } @@ -5642,8 +5654,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - if (Val > 4 || (Val == 4 && !isGFX10())) - errorExpTgt(); + if (Val > 4 || (Val == 4 && !isGFX10())) { + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; + } Val += 12; return MatchOperand_Success; @@ -5659,8 +5673,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - if (Val >= 32) - errorExpTgt(); + if (Val >= 32) { + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; + } Val += 32; return MatchOperand_Success; @@ -5671,8 +5687,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - errorExpTgt(); - return MatchOperand_Success; + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; } return MatchOperand_NoMatch; @@ -6107,12 +6123,12 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() { Error(S, (Imm == 0)? "expected a VGPR index mode or a closing parenthesis" : "expected a VGPR index mode"); - break; + return UNDEF; } if (Imm & Mode) { Error(S, "duplicate VGPR index mode"); - break; + return UNDEF; } Imm |= Mode; @@ -6120,7 +6136,7 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() { break; if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) - break; + return UNDEF; } return Imm; @@ -6129,6 +6145,8 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() { OperandMatchResultTy AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { + using namespace llvm::AMDGPU::VGPRIndexMode; + int64_t Imm = 0; SMLoc S = Parser.getTok().getLoc(); @@ -6139,15 +6157,16 @@ AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { Parser.Lex(); Parser.Lex(); - // If parse failed, trigger an error but do not return error code - // to avoid excessive error messages. Imm = parseGPRIdxMacro(); + if (Imm == UNDEF) + return MatchOperand_ParseFail; } else { if (getParser().parseAbsoluteExpression(Imm)) - return MatchOperand_NoMatch; + return MatchOperand_ParseFail; if (Imm < 0 || !isUInt<4>(Imm)) { Error(S, "invalid immediate: only 4-bit values are legal"); + return MatchOperand_ParseFail; } } @@ -6173,22 +6192,22 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { if (isRegister() || isModifier()) return MatchOperand_NoMatch; - if (parseExpr(Operands)) { + if (!parseExpr(Operands)) + return MatchOperand_ParseFail; - AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); - assert(Opr.isImm() || Opr.isExpr()); - SMLoc Loc = Opr.getStartLoc(); + AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); + assert(Opr.isImm() || Opr.isExpr()); + SMLoc Loc = Opr.getStartLoc(); - // Currently we do not support arbitrary expressions as branch targets. - // Only labels and absolute expressions are accepted. - if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { - Error(Loc, "expected an absolute expression or a label"); - } else if (Opr.isImm() && !Opr.isS16Imm()) { - Error(Loc, "expected a 16-bit signed jump offset"); - } + // Currently we do not support arbitrary expressions as branch targets. + // Only labels and absolute expressions are accepted. + if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { + Error(Loc, "expected an absolute expression or a label"); + } else if (Opr.isImm() && !Opr.isS16Imm()) { + Error(Loc, "expected a 16-bit signed jump offset"); } - return MatchOperand_Success; // avoid excessive error messages + return MatchOperand_Success; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 58d77f3b224b5..d6013baf0f36e 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -217,7 +217,8 @@ enum EncBits : unsigned { SRC1_ENABLE = 1 << ID_SRC1, SRC2_ENABLE = 1 << ID_SRC2, DST_ENABLE = 1 << ID_DST, - ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE + ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, + UNDEF = 0xFFFF }; } // namespace VGPRIndexMode diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index ab89257a57168..9a30d4fd6bd4a 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -282,9 +282,6 @@ static bool updateOperand(FoldCandidate &Fold, assert(!Fold.needsShrink() && "not handled"); if (Fold.isImm()) { - // FIXME: ChangeToImmediate should probably clear the subreg flags. It's - // reinterpreted as TargetFlags. - Old.setSubReg(0); Old.ChangeToImmediate(Fold.ImmToFold); return true; } @@ -834,8 +831,6 @@ void SIFoldOperands::foldOperand( UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); - // FIXME: ChangeToImmediate should clear subreg - UseMI->getOperand(1).setSubReg(0); if (OpToFold.isImm()) UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm()); else diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 9b795b22f5234..c947995fd3ee4 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -112,15 +112,19 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, // 3: There's no free lane to spill, and no free register to save FP/BP, // so we're forced to spill another VGPR to use for the spill. FrameIndex = NewFI; + + LLVM_DEBUG( + auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); + dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " + << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); } else { + // Remove dead index + MF.getFrameInfo().RemoveStackObject(NewFI); // 4: If all else fails, spill the FP/BP to memory. FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); + LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " + << (IsFP ? "FP" : "BP") << '\n'); } - - LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); - dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " - << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane - << '\n';); } else { LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " << printReg(TempSGPR, TRI) << '\n'); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c04f569cebdee..ad9c4d0673476 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -34,8 +34,9 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" -#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 74f8864640691..9aa28cff10868 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2656,7 +2656,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.setDesc(get(NewOpc)); UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); - UseMI.getOperand(1).setTargetFlags(0); UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); return true; } diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8488e86fbc297..914668f2b68a2 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -113,6 +113,8 @@ class SILowerControlFlow : public MachineFunctionPass { void combineMasks(MachineInstr &MI); + bool removeMBBifRedundant(MachineBasicBlock &MBB); + void process(MachineInstr &MI); // Skip to the next instruction, ignoring debug instructions, and trivial @@ -154,9 +156,6 @@ class SILowerControlFlow : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -335,21 +334,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); - // We are running before TwoAddressInstructions, and si_else's operands are - // tied. In order to correctly tie the registers, split this into a copy of - // the src like it does. - Register CopyReg = MRI->createVirtualRegister(BoolRC); - MachineInstr *CopyExec = - BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg) - .add(MI.getOperand(1)); // Saved EXEC - // This must be inserted before phis and any spill code inserted before the // else. Register SaveReg = ExecModified ? MRI->createVirtualRegister(BoolRC) : DstReg; MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg) - .addReg(CopyReg); + .add(MI.getOperand(1)); // Saved EXEC MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); @@ -386,16 +377,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); - LIS->InsertMachineInstrInMaps(*CopyExec); LIS->InsertMachineInstrInMaps(*OrSaveExec); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*Branch); - // src reg is tied to dst reg. LIS->removeInterval(DstReg); LIS->createAndComputeVirtRegInterval(DstReg); - LIS->createAndComputeVirtRegInterval(CopyReg); if (ExecModified) LIS->createAndComputeVirtRegInterval(SaveReg); @@ -615,6 +603,7 @@ void SILowerControlFlow::optimizeEndCf() { if (LIS) LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); + removeMBBifRedundant(MBB); } } } @@ -669,6 +658,47 @@ void SILowerControlFlow::process(MachineInstr &MI) { } } +bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { + bool Redundant = true; + for (auto &I : MBB.instrs()) { + if (!I.isDebugInstr() && !I.isUnconditionalBranch()) + Redundant = false; + } + if (Redundant) { + MachineBasicBlock *Succ = *MBB.succ_begin(); + SmallVector Preds(MBB.predecessors()); + for (auto P : Preds) { + P->replaceSuccessor(&MBB, Succ); + MachineBasicBlock::iterator I(P->getFirstInstrTerminator()); + while (I != P->end()) { + if (I->isBranch()) { + if (TII->getBranchDestBlock(*I) == &MBB) { + I->getOperand(0).setMBB(Succ); + break; + } + } + I++; + } + if (I == P->end()) { + MachineFunction *MF = P->getParent(); + MachineFunction::iterator InsertPt = + P->getNextNode() ? MachineFunction::iterator(P->getNextNode()) + : MF->end(); + MF->splice(InsertPt, Succ); + } + } + MBB.removeSuccessor(Succ); + if (LIS) { + for (auto &I : MBB.instrs()) + LIS->RemoveMachineInstrFromMaps(I); + } + MBB.clear(); + MBB.eraseFromParent(); + return true; + } + return false; +} + bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 9548c0f3d9c4a..8f718ce6cb466 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -86,13 +86,9 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) { - // It's possible to have only one component of a super-reg defined by - // a single mov, so we need to clear any subregister flag. - Src0.setSubReg(0); Src0.ChangeToImmediate(MovSrc.getImm()); ConstantFolded = true; } else if (MovSrc.isFI()) { - Src0.setSubReg(0); Src0.ChangeToFrameIndex(MovSrc.getIndex()); ConstantFolded = true; } else if (MovSrc.isGlobal()) { diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index eda41e8eef065..75543093bcbfe 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4382,6 +4382,10 @@ let Predicates = [HasMVEInt] in { // vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles. def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>; +def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; + let Predicates = [HasMVEInt] in { foreach VT = [ v4i1, v8i1, v16i1 ] in { def : Pat<(i32 (predicate_cast (VT VCCR:$src))), @@ -4394,6 +4398,13 @@ let Predicates = [HasMVEInt] in { (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>; } + // If we happen to be casting from a load we can convert that straight + // into a predicate load, so long as the load is of the correct type. + foreach VT = [ v4i1, v8i1, v16i1 ] in { + def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))), + (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>; + } + // Here we match the specific SDNode type 'ARMVectorRegCastImpl' // rather than the more general 'ARMVectorRegCast' which would also // match some bitconverts. If we use the latter in cases where the diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index f3206306a3b60..c789b35f32af5 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1039,13 +1039,28 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { + int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) { + // Make operations on i1 relatively expensive as this often involves + // combining predicates. AND and XOR should be easier to handle with IT + // blocks. + switch (ISDOpcode) { + default: + break; + case ISD::AND: + case ISD::XOR: + return 2; + case ISD::OR: + return 3; + } + } + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); - int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->hasNEON()) { diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index db5b5633f6d90..1bad0d11fee4b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -16,7 +16,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/CodeGen/MachineInstr.h" +#include +#include #include #include #include "BTF.h" @@ -27,9 +28,12 @@ class AsmPrinter; class BTFDebug; class DIType; class GlobalVariable; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MCInst; class MCStreamer; class MCSymbol; -class MachineFunction; /// The base class for BTF type generation. class BTFTypeBase { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 7b1c8b6079a0e..645d28de2b20d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1862,6 +1862,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST"; case HexagonISD::VALIGN: return "HexagonISD::VALIGN"; case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR"; + case HexagonISD::VPACKL: return "HexagonISD::VPACKL"; case HexagonISD::OP_END: break; } return nullptr; @@ -3014,7 +3015,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR) return LowerINLINEASM(Op, DAG); - if (isHvxOperation(Op.getNode())) { + if (isHvxOperation(Op.getNode(), DAG)) { // If HVX lowering returns nothing, try the default lowering. if (SDValue V = LowerHvxOperation(Op, DAG)) return V; @@ -3075,7 +3076,7 @@ void HexagonTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - if (isHvxOperation(N)) { + if (isHvxOperation(N, DAG)) { LowerHvxOperationWrapper(N, Results, DAG); if (!Results.empty()) return; @@ -3094,7 +3095,7 @@ void HexagonTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - if (isHvxOperation(N)) { + if (isHvxOperation(N, DAG)) { ReplaceHvxNodeResults(N, Results, DAG); if (!Results.empty()) return; @@ -3123,7 +3124,7 @@ HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.isBeforeLegalizeOps()) return SDValue(); - if (isHvxOperation(N)) { + if (isHvxOperation(N, DCI.DAG)) { if (SDValue V = PerformHvxDAGCombine(N, DCI)) return V; return SDValue(); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index d7a960fde0a20..8473515b3c758 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -30,465 +30,471 @@ namespace llvm { namespace HexagonISD { - enum NodeType : unsigned { - OP_BEGIN = ISD::BUILTIN_OP_END, - - CONST32 = OP_BEGIN, - CONST32_GP, // For marking data present in GP. - ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). - SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). - ALLOCA, - - AT_GOT, // Index in GOT. - AT_PCREL, // Offset relative to PC. - - CALL, // Function call. - CALLnr, // Function call that does not return. - CALLR, - - RET_FLAG, // Return with a flag operand. - BARRIER, // Memory barrier. - JT, // Jump table. - CP, // Constant pool. - - COMBINE, - VSPLAT, // Generic splat, selection depends on argument/return - // types. - VASL, - VASR, - VLSR, - - TSTBIT, - INSERT, - EXTRACTU, - VEXTRACTW, - VINSERTW0, - VROR, - TC_RETURN, - EH_RETURN, - DCFETCH, - READCYCLE, - PTRUE, - PFALSE, - D2P, // Convert 8-byte value to 8-bit predicate register. [*] - P2D, // Convert 8-bit predicate register to 8-byte value. [*] - V2Q, // Convert HVX vector to a vector predicate reg. [*] - Q2V, // Convert vector predicate to an HVX vector. [*] - // [*] The equivalence is defined as "Q <=> (V != 0)", - // where the != operation compares bytes. - // Note: V != 0 is implemented as V >u 0. - QCAT, - QTRUE, - QFALSE, - VZERO, - VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type. - TYPECAST, // No-op that's used to convert between different legal - // types in a register. - VALIGN, // Align two vectors (in Op0, Op1) to one that would have - // been loaded from address in Op2. - VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is - // an address in a vector load, then it's a no-op. - OP_END - }; +enum NodeType : unsigned { + OP_BEGIN = ISD::BUILTIN_OP_END, + + CONST32 = OP_BEGIN, + CONST32_GP, // For marking data present in GP. + ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). + SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). + ALLOCA, + + AT_GOT, // Index in GOT. + AT_PCREL, // Offset relative to PC. + + CALL, // Function call. + CALLnr, // Function call that does not return. + CALLR, + + RET_FLAG, // Return with a flag operand. + BARRIER, // Memory barrier. + JT, // Jump table. + CP, // Constant pool. + + COMBINE, + VSPLAT, // Generic splat, selection depends on argument/return + // types. + VASL, + VASR, + VLSR, + + TSTBIT, + INSERT, + EXTRACTU, + VEXTRACTW, + VINSERTW0, + VROR, + TC_RETURN, + EH_RETURN, + DCFETCH, + READCYCLE, + PTRUE, + PFALSE, + D2P, // Convert 8-byte value to 8-bit predicate register. [*] + P2D, // Convert 8-bit predicate register to 8-byte value. [*] + V2Q, // Convert HVX vector to a vector predicate reg. [*] + Q2V, // Convert vector predicate to an HVX vector. [*] + // [*] The equivalence is defined as "Q <=> (V != 0)", + // where the != operation compares bytes. + // Note: V != 0 is implemented as V >u 0. + QCAT, + QTRUE, + QFALSE, + VZERO, + VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type. + TYPECAST, // No-op that's used to convert between different legal + // types in a register. + VALIGN, // Align two vectors (in Op0, Op1) to one that would have + // been loaded from address in Op2. + VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is + // an address in a vector load, then it's a no-op. + VPACKL, // Pack low parts of the input vector to the front of the + // output. For example v64i16 VPACKL(v32i32) will pick + // the low halfwords and pack them into the first 32 + // halfwords of the output. The rest of the output is + // unspecified. + OP_END +}; } // end namespace HexagonISD - class HexagonSubtarget; - - class HexagonTargetLowering : public TargetLowering { - int VarArgsFrameOffset; // Frame offset to start of varargs area. - const HexagonTargetMachine &HTM; - const HexagonSubtarget &Subtarget; - - bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) - const; - - public: - explicit HexagonTargetLowering(const TargetMachine &TM, - const HexagonSubtarget &ST); - - bool isHVXVectorType(MVT Ty) const; - - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, - bool isCallerStructRet, const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG& DAG) const; - - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - MachineFunction &MF, - unsigned Intrinsic) const override; - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTruncateFree(EVT VT1, EVT VT2) const override; - - bool isCheapToSpeculateCttz() const override { return true; } - bool isCheapToSpeculateCtlz() const override { return true; } - bool isCtlzFast() const override { return true; } - - bool hasBitTest(SDValue X, SDValue Y) const override; - - bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; - - /// Return true if an FMA operation is faster than a pair of mul and add - /// instructions. fmuladd intrinsics will be expanded to FMAs when this - /// method returns true (and FMAs are legal), otherwise fmuladd is - /// expanded to mul + add. - bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, - EVT) const override; - - // Should we expand the build vector with shuffles? - bool shouldExpandBuildVectorWithShuffles(EVT VT, - unsigned DefinedValues) const override; - - bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override; - TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) - const override; - - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - void LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const override; - void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const override; - - const char *getTargetNodeName(unsigned Opcode) const override; - - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const override; - SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; - SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; - SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; - SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, - GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT, - unsigned ReturnReg, unsigned char OperandFlags) const; - SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const override; - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals, - const SmallVectorImpl &OutVals, - SDValue Callee) const; - - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - - bool CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const override; - - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SDLoc &dl, SelectionDAG &DAG) const override; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - - bool mayBeEmittedAsTailCall(const CallInst *CI) const override; - - Register getRegisterByName(const char* RegName, LLT VT, - const MachineFunction &MF) const override; - - /// If a physical register, this returns the register that receives the - /// exception address on entry to an EH pad. - Register - getExceptionPointerRegister(const Constant *PersonalityFn) const override { - return Hexagon::R0; - } - - /// If a physical register, this returns the register that receives the - /// exception typeid on entry to a landing pad. - Register - getExceptionSelectorRegister(const Constant *PersonalityFn) const override { - return Hexagon::R1; - } - - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - - EVT getSetCCResultType(const DataLayout &, LLVMContext &C, - EVT VT) const override { - if (!VT.isVector()) - return MVT::i1; - else - return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); - } - - bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const override; - - ConstraintType getConstraintType(StringRef Constraint) const override; - - std::pair - getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - StringRef Constraint, MVT VT) const override; - - unsigned - getInlineAsmMemConstraint(StringRef ConstraintCode) const override { - if (ConstraintCode == "o") - return InlineAsm::Constraint_o; - return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); - } - - // Intrinsics - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; - /// isLegalAddressingMode - Return true if the addressing mode represented - /// by AM is legal for this target, for a load/store of the specified type. - /// The type may be VoidTy, in which case only return true if the addressing - /// mode is legal for a load/store of any legal type. - /// TODO: Handle pre/postinc as well. - bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS, - Instruction *I = nullptr) const override; - /// Return true if folding a constant offset with the given GlobalAddress - /// is legal. It is frequently not legal in PIC relocation models. - bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; - - /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can - /// compare a register against the immediate without having to materialize - /// the immediate into a register. - bool isLegalICmpImmediate(int64_t Imm) const override; - - EVT getOptimalMemOpType(const MemOp &Op, - const AttributeList &FuncAttributes) const override; - - bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, - unsigned AddrSpace, Align Alignment, - MachineMemOperand::Flags Flags, - bool *Fast) const override; - - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) - const override; - - /// Returns relocation base for the given PIC jumptable. - SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) - const override; - - bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, - EVT NewVT) const override; - - // Handling of atomic RMW instructions. - Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, - AtomicOrdering Ord) const override; - Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, - Value *Addr, AtomicOrdering Ord) const override; - AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; - - AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { - return AtomicExpansionKind::LLSC; - } - - private: - void initializeHVXLowering(); - unsigned getPreferredHvxVectorAction(MVT VecTy) const; - - void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl, - unsigned NeedAlign) const; - - std::pair getBaseAndOffset(SDValue Addr) const; - - bool getBuildVectorConstInts(ArrayRef Values, MVT VecTy, - SelectionDAG &DAG, - MutableArrayRef Consts) const; - SDValue buildVector32(ArrayRef Elem, const SDLoc &dl, MVT VecTy, +class HexagonSubtarget; + +class HexagonTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + const HexagonTargetMachine &HTM; + const HexagonSubtarget &Subtarget; + + bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) + const; + +public: + explicit HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &ST); + + bool isHVXVectorType(MVT Ty) const; + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, + bool isCallerStructRet, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG& DAG) const; + + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; + + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; + + bool isCheapToSpeculateCttz() const override { return true; } + bool isCheapToSpeculateCtlz() const override { return true; } + bool isCtlzFast() const override { return true; } + + bool hasBitTest(SDValue X, SDValue Y) const override; + + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + + /// Return true if an FMA operation is faster than a pair of mul and add + /// instructions. fmuladd intrinsics will be expanded to FMAs when this + /// method returns true (and FMAs are legal), otherwise fmuladd is + /// expanded to mul + add. + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, + EVT) const override; + + // Should we expand the build vector with shuffles? + bool shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + + bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override; + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) + const override; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, + GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT, + unsigned ReturnReg, unsigned char OperandFlags) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + const SmallVectorImpl &OutVals, + SDValue Callee) const; + + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + + bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + + Register getRegisterByName(const char* RegName, LLT VT, + const MachineFunction &MF) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return Hexagon::R0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return Hexagon::R1; + } + + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + + EVT getSetCCResultType(const DataLayout &, LLVMContext &C, + EVT VT) const override { + if (!VT.isVector()) + return MVT::i1; + else + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + } + + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + + ConstraintType getConstraintType(StringRef Constraint) const override; + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + // Intrinsics + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; + /// Return true if folding a constant offset with the given GlobalAddress + /// is legal. It is frequently not legal in PIC relocation models. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + bool isLegalICmpImmediate(int64_t Imm) const override; + + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + + bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace, Align Alignment, + MachineMemOperand::Flags Flags, + bool *Fast) const override; + + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) + const override; + + /// Returns relocation base for the given PIC jumptable. + SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) + const override; + + bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, + EVT NewVT) const override; + + // Handling of atomic RMW instructions. + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + AtomicExpansionKind + shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { + return AtomicExpansionKind::LLSC; + } + +private: + void initializeHVXLowering(); + unsigned getPreferredHvxVectorAction(MVT VecTy) const; + + void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl, + unsigned NeedAlign) const; + + std::pair getBaseAndOffset(SDValue Addr) const; + + bool getBuildVectorConstInts(ArrayRef Values, MVT VecTy, + SelectionDAG &DAG, + MutableArrayRef Consts) const; + SDValue buildVector32(ArrayRef Elem, const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const; + SDValue buildVector64(ArrayRef Elem, const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const; + SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ValTy, MVT ResTy, SelectionDAG &DAG) const; + SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, + const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; + SDValue expandPredicate(SDValue Vec32, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue buildVector64(ArrayRef Elem, const SDLoc &dl, MVT VecTy, - SelectionDAG &DAG) const; - SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl, - MVT ValTy, MVT ResTy, SelectionDAG &DAG) const; - SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, - const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; - SDValue expandPredicate(SDValue Vec32, const SDLoc &dl, + SDValue contractPredicate(SDValue Vec64, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue contractPredicate(SDValue Vec64, const SDLoc &dl, + SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const; + + bool isUndef(SDValue Op) const { + if (Op.isMachineOpcode()) + return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; + return Op.getOpcode() == ISD::UNDEF; + } + SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty, + ArrayRef Ops, SelectionDAG &DAG) const { + SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); + return SDValue(N, 0); + } + SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const; + + using VectorPair = std::pair; + using TypePair = std::pair; + + SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, + const SDLoc &dl, SelectionDAG &DAG) const; + + MVT ty(SDValue Op) const { + return Op.getValueType().getSimpleVT(); + } + TypePair ty(const VectorPair &Ops) const { + return { Ops.first.getValueType().getSimpleVT(), + Ops.second.getValueType().getSimpleVT() }; + } + MVT tyScalar(MVT Ty) const { + if (!Ty.isVector()) + return Ty; + return MVT::getIntegerVT(Ty.getSizeInBits()); + } + MVT tyVector(MVT Ty, MVT ElemTy) const { + if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) + return Ty; + unsigned TyWidth = Ty.getSizeInBits(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert((TyWidth % ElemWidth) == 0); + return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); + } + + MVT typeJoin(const TypePair &Tys) const; + TypePair typeSplit(MVT Ty) const; + MVT typeExtElem(MVT VecTy, unsigned Factor) const; + MVT typeTruncElem(MVT VecTy, unsigned Factor) const; + + SDValue opJoin(const VectorPair &Ops, const SDLoc &dl, + SelectionDAG &DAG) const; + VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; + SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; + + bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags, + bool *Fast) const; + bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, + MachineMemOperand::Flags Flags, + bool *Fast) const; + + bool isHvxSingleTy(MVT Ty) const; + bool isHvxPairTy(MVT Ty) const; + bool isHvxBoolTy(MVT Ty) const; + SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, + SelectionDAG &DAG) const; + SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; + SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, + ArrayRef Mask, SelectionDAG &DAG) const; + + SDValue buildHvxVectorReg(ArrayRef Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl, + unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const; - SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const; - - bool isUndef(SDValue Op) const { - if (Op.isMachineOpcode()) - return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; - return Op.getOpcode() == ISD::UNDEF; - } - SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty, - ArrayRef Ops, SelectionDAG &DAG) const { - SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); - return SDValue(N, 0); - } - SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const; - - using VectorPair = std::pair; - using TypePair = std::pair; - - SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, - const SDLoc &dl, SelectionDAG &DAG) const; - - MVT ty(SDValue Op) const { - return Op.getValueType().getSimpleVT(); - } - TypePair ty(const VectorPair &Ops) const { - return { Ops.first.getValueType().getSimpleVT(), - Ops.second.getValueType().getSimpleVT() }; - } - MVT tyScalar(MVT Ty) const { - if (!Ty.isVector()) - return Ty; - return MVT::getIntegerVT(Ty.getSizeInBits()); - } - MVT tyVector(MVT Ty, MVT ElemTy) const { - if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) - return Ty; - unsigned TyWidth = Ty.getSizeInBits(); - unsigned ElemWidth = ElemTy.getSizeInBits(); - assert((TyWidth % ElemWidth) == 0); - return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); - } - - MVT typeJoin(const TypePair &Tys) const; - TypePair typeSplit(MVT Ty) const; - MVT typeExtElem(MVT VecTy, unsigned Factor) const; - MVT typeTruncElem(MVT VecTy, unsigned Factor) const; - - SDValue opJoin(const VectorPair &Ops, const SDLoc &dl, - SelectionDAG &DAG) const; - VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; - - bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags, - bool *Fast) const; - bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, - MachineMemOperand::Flags Flags, - bool *Fast) const; - - bool isHvxSingleTy(MVT Ty) const; - bool isHvxPairTy(MVT Ty) const; - bool isHvxBoolTy(MVT Ty) const; - SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, - SelectionDAG &DAG) const; - SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; - SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, - ArrayRef Mask, SelectionDAG &DAG) const; - - SDValue buildHvxVectorReg(ArrayRef Values, const SDLoc &dl, - MVT VecTy, SelectionDAG &DAG) const; - SDValue buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, - MVT VecTy, SelectionDAG &DAG) const; - SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl, - unsigned BitBytes, bool ZeroFill, - SelectionDAG &DAG) const; - SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, + SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const; - SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, + SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const; - SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, + SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, + SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, - MVT ResTy, SelectionDAG &DAG) const; - SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, - MVT ResTy, SelectionDAG &DAG) const; - SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, - const SDLoc &dl, SelectionDAG &DAG) const; - SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, - const SDLoc &dl, SelectionDAG &DAG) const; - SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, - bool ZeroExt, SelectionDAG &DAG) const; - SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, - SelectionDAG &DAG) const; + SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, + bool ZeroExt, SelectionDAG &DAG) const; + SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, + SelectionDAG &DAG) const; - SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const; - - SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; - SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; - SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; - - std::pair - findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) - const override; - - bool isHvxOperation(SDNode *N) const; - SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; - void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; - void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; - SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - }; + SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const; + + SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; + SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const; + + std::pair + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) + const override; + + bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const; + SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; + void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; + void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; + SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; +}; } // end namespace llvm diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index ed701728892ad..e5d05cfe64c47 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -233,8 +233,10 @@ HexagonTargetLowering::initializeHVXLowering() { for (int N = 2; N < MaxElems; N *= 2) { MVT VecTy = MVT::getVectorVT(ElemTy, N); auto Action = getPreferredVectorAction(VecTy); - if (Action == TargetLoweringBase::TypeWidenVector) + if (Action == TargetLoweringBase::TypeWidenVector) { setOperationAction(ISD::STORE, VecTy, Custom); + setOperationAction(ISD::TRUNCATE, VecTy, Custom); + } } } @@ -629,6 +631,9 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, if (!ZeroFill) return S; // Fill the bytes beyond BlockLen with 0s. + // V6_pred_scalar2 cannot fill the entire predicate, so it only works + // when BlockLen < HwLen. + assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); @@ -1092,6 +1097,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, // ByteVec is the target vector VecV rotated in such a way that the // subvector should be inserted at index 0. Generate a predicate mask // and use vmux to do the insertion. + assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); @@ -1702,18 +1708,19 @@ SDValue HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); unsigned HwLen = Subtarget.getVectorLength(); + MachineFunction &MF = DAG.getMachineFunction(); auto *MaskN = cast(Op.getNode()); SDValue Mask = MaskN->getMask(); SDValue Chain = MaskN->getChain(); SDValue Base = MaskN->getBasePtr(); - auto *MemOp = MaskN->getMemOperand(); + auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen); unsigned Opc = Op->getOpcode(); assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); if (Opc == ISD::MLOAD) { MVT ValTy = ty(Op); - SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MaskN->getMemOperand()); + SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp); SDValue Thru = cast(MaskN)->getPassThru(); if (isUndef(Thru)) return Load; @@ -1903,6 +1910,7 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { } assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia + assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue StoreQ = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); @@ -1912,6 +1920,37 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { MOp, ISD::UNINDEXED, false, false); } +SDValue +HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); + unsigned HwWidth = 8*Subtarget.getVectorLength(); + + auto getFactor = [HwWidth](MVT Ty) { + unsigned Width = Ty.getSizeInBits(); + assert(HwWidth % Width == 0); + return HwWidth / Width; + }; + + auto getWideTy = [getFactor](MVT Ty) { + unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); + return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); + }; + + SDValue Op0 = Op.getOperand(0); + MVT ResTy = ty(Op); + MVT OpTy = ty(Op0); + if (Subtarget.isHVXVectorType(OpTy)) + return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); + + MVT WideOpTy = getWideTy(OpTy); + SmallVector Concats = {Op0}; + for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i) + Concats.push_back(DAG.getUNDEF(OpTy)); + + SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats); + return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); +} + SDValue HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); @@ -2020,7 +2059,14 @@ void HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { unsigned Opc = N->getOpcode(); + SDValue Op(N, 0); switch (Opc) { + case ISD::TRUNCATE: + if (!Subtarget.isHVXVectorType(ty(Op), false)) { + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); + } + break; case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); @@ -2058,25 +2104,38 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) } bool -HexagonTargetLowering::isHvxOperation(SDNode *N) const { - if (N->getOpcode() == ISD::STORE) { - // If it's a store-to-be-widened, treat it as an HVX operation. - SDValue Val = cast(N)->getValue(); - MVT ValTy = ty(Val); - if (ValTy.isVector()) { - auto Action = getPreferredVectorAction(ValTy); - if (Action == TargetLoweringBase::TypeWidenVector) - return true; - } - } +HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. - auto IsHvxTy = [this] (EVT Ty) { + auto IsHvxTy = [this](EVT Ty) { return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); }; auto IsHvxOp = [this](SDValue Op) { return Op.getValueType().isSimple() && Subtarget.isHVXVectorType(ty(Op), true); }; - return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp); + if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp)) + return true; + + // Check if this could be an HVX operation after type widening. + auto IsWidenedToHvx = [this, &DAG](SDValue Op) { + if (!Op.getValueType().isSimple()) + return false; + MVT ValTy = ty(Op); + if (ValTy.isVector()) { + auto Action = getPreferredVectorAction(ValTy); + if (Action == TargetLoweringBase::TypeWidenVector) { + EVT WideTy = getTypeToTransformTo(*DAG.getContext(), ValTy); + assert(WideTy.isSimple()); + return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); + } + } + return false; + }; + + for (int i = 0, e = N->getNumValues(); i != e; ++i) { + if (IsWidenedToHvx(SDValue(N, i))) + return true; + } + return llvm::any_of(N->ops(), IsWidenedToHvx); } diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 0e5772bd690f2..b656a845b1526 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -1,3 +1,6 @@ +def SDTVecUnaryOp: + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; + def SDTVecBinOp: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; @@ -37,6 +40,7 @@ def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; +def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; def vzero: PatFrag<(ops), (HexagonVZERO)>; def qtrue: PatFrag<(ops), (HexagonQTRUE)>; @@ -44,7 +48,8 @@ def qfalse: PatFrag<(ops), (HexagonQFALSE)>; def qcat: PatFrag<(ops node:$Qs, node:$Qt), (HexagonQCAT node:$Qs, node:$Qt)>; -def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; +def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; +def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; @@ -401,6 +406,10 @@ let Predicates = [UseHVX] in { def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; + def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w HvxVR:$Vs, (IMPLICIT_DEF))>; + def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; + def: Pat<(VecI16 (bswap HVI16:$Vs)), (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>; def: Pat<(VecI32 (bswap HVI32:$Vs)), diff --git a/llvm/lib/Target/Mips/MipsCallLowering.h b/llvm/lib/Target/Mips/MipsCallLowering.h index a284cf5e26cf5..6e43e55cee9b1 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.h +++ b/llvm/lib/Target/Mips/MipsCallLowering.h @@ -18,6 +18,7 @@ namespace llvm { +class MachineMemOperand; class MipsTargetLowering; class MipsCallLowering : public CallLowering { diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 0c5df4ba1bade..03933d8205766 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -40,8 +40,6 @@ namespace llvm { class Argument; -class CCState; -class CCValAssign; class FastISel; class FunctionLoweringInfo; class MachineBasicBlock; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4ffb35dda4fa1..b213abb57aa83 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -262,6 +262,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); @@ -860,7 +862,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); - if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { + if (Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -1234,12 +1236,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::SELECT_CC); } - // Use reciprocal estimates. - if (TM.Options.UnsafeFPMath) { - setTargetDAGCombine(ISD::FDIV); - setTargetDAGCombine(ISD::FSQRT); - } - if (Subtarget.hasP9Altivec()) { setTargetDAGCombine(ISD::ABS); setTargetDAGCombine(ISD::VSELECT); @@ -1511,6 +1507,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_FADDRTZ: + return "PPCISD::STRICT_FADDRTZ"; case PPCISD::STRICT_FCTIDZ: return "PPCISD::STRICT_FCTIDZ"; case PPCISD::STRICT_FCTIWZ: @@ -8170,38 +8168,86 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + // FP to INT conversions are legal for f128. - if (Src.getValueType() == MVT::f128) + if (SrcVT == MVT::f128) return Op; // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (Src.getValueType() == MVT::ppcf128 && !IsStrict) { - if (Op.getValueType() == MVT::i32) { + if (SrcVT == MVT::ppcf128) { + if (DstVT == MVT::i32) { + // TODO: Conservatively pass only nofpexcept flag here. Need to check and + // set other fast-math flags to FP operations in both strict and + // non-strict cases. (FP_TO_SINT, FSUB) + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + if (IsSigned) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(1, dl)); - // Add the two halves of the long double in round-to-zero mode. - SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); - - // Now use a smaller FP_TO_SINT. - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + // Add the two halves of the long double in round-to-zero mode, and use + // a smaller FP_TO_SINT. + if (IsStrict) { + SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl, + DAG.getVTList(MVT::f64, MVT::Other), + {Op.getOperand(0), Lo, Hi}, Flags); + return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(MVT::i32, MVT::Other), + {Res.getValue(1), Res}, Flags); + } else { + SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } } else { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); - // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X - // FIXME: generated code sucks. - // TODO: Are there fast-math-flags to propagate to this FSUB? - SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp); - True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); - True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, - DAG.getConstant(0x80000000, dl, MVT::i32)); - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); - return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE); + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); + SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT); + if (IsStrict) { + // Sel = Src < 0x80000000 + // FltOfs = select Sel, 0.0, 0x80000000 + // IntOfs = select Sel, 0, 0x80000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs + SDValue Chain = Op.getOperand(0); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); + SDValue Sel = + DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); + Chain = Sel.getValue(1); + + SDValue FltOfs = DAG.getSelect( + dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, + DAG.getVTList(SrcVT, MVT::Other), + {Chain, Src, FltOfs}, Flags); + Chain = Val.getValue(1); + SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(DstVT, MVT::Other), + {Chain, Val}, Flags); + Chain = SInt.getValue(1); + SDValue IntOfs = DAG.getSelect( + dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask); + SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); + return DAG.getMergeValues({Result, Chain}, dl); + } else { + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst); + True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); + True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask); + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE); + } } } @@ -12176,7 +12222,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(PPC::RM, RegState::ImplicitDefine); // Perform addition. - BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) + .addReg(Src1) + .addReg(Src2); + if (MI.getFlag(MachineInstr::NoFPExcept)) + MIB.setMIFlag(MachineInstr::NoFPExcept); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index f51cd2823fcdd..05c9a5d314133 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -453,6 +453,9 @@ namespace llvm { STRICT_FCFIDS, STRICT_FCFIDUS, + /// Constrained floating point add in round-to-zero mode. + STRICT_FADDRTZ, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 04ecb72a5ccd5..a6932005d5ad1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -182,7 +182,12 @@ def PPCmffs : SDNode<"PPCISD::MFFS", // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; +def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, + [SDNPHasChain]>; +def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), + [(PPCfaddrtz node:$lhs, node:$rhs), + (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -2960,9 +2965,9 @@ def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>; let Predicates = [HasFPU] in { // Custom inserter instruction to perform FADD in round-to-zero mode. -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", - [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>; } // The above pseudo gets expanded to make use of the following instructions diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 81455adbd0b7b..73321dec99d37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1003,19 +1003,24 @@ let Predicates = [IsISA3_1] in { (int_ppc_altivec_vextractqm v1i128:$vB))]>; def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandbm $vD, $vB", IIC_VecGeneral, - []>; + [(set v16i8:$vD, (int_ppc_altivec_vexpandbm + v16i8:$vB))]>; def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandhm $vD, $vB", IIC_VecGeneral, - []>; + [(set v8i16:$vD, (int_ppc_altivec_vexpandhm + v8i16:$vB))]>; def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandwm $vD, $vB", IIC_VecGeneral, - []>; + [(set v4i32:$vD, (int_ppc_altivec_vexpandwm + v4i32:$vB))]>; def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), "vexpanddm $vD, $vB", IIC_VecGeneral, - []>; + [(set v2i64:$vD, (int_ppc_altivec_vexpanddm + v2i64:$vB))]>; def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandqm $vD, $vB", IIC_VecGeneral, - []>; + [(set v1i128:$vD, (int_ppc_altivec_vexpandqm + v1i128:$vB))]>; def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), "mtvsrbm $vD, $rB", IIC_VecGeneral, []>; @@ -1256,16 +1261,25 @@ let Predicates = [IsISA3_1] in { } def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA, + v2i64:$vB))]>; def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmuleud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA, + v2i64:$vB))]>; def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulosd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA, + v2i64:$vB))]>; def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>; - def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>; + "vmuloud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA, + v2i64:$vB))]>; + def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmsumcud + v2i64:$vA, v2i64:$vB, v1i128:$vC))]>; def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>; def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index d2aba6bd6e8de..227c863685ae9 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1555,6 +1555,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); MI.getOperand(2).setImm(NewSH); MI.getOperand(3).setImm(NewMB); + MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill()); + SrcMI->getOperand(1).setIsKill(false); LLVM_DEBUG(dbgs() << "To: "); LLVM_DEBUG(MI.dump()); diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 4c9013aa1e234..6c78c47e8551d 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -64,7 +64,7 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { } // end anonymous namespace char RISCVMergeBaseOffsetOpt::ID = 0; -INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset", +INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, RISCV_MERGE_BASE_OFFSET_NAME, false, false) // Detect the pattern: diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp index e9d3aaeb9cfe2..6ad6940c6b51b 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.cpp +++ b/llvm/lib/Target/Sparc/LeonPasses.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "LeonPasses.h" -#include "llvm/CodeGen/ISDOpcodes.h" +#include "SparcSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index b165bc93780f6..9bc4569a12984 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h @@ -12,14 +12,11 @@ #ifndef LLVM_LIB_TARGET_SPARC_LEON_PASSES_H #define LLVM_LIB_TARGET_SPARC_LEON_PASSES_H -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" - -#include "Sparc.h" -#include "SparcSubtarget.h" namespace llvm { +class SparcSubtarget; + class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass : public MachineFunctionPass { protected: @@ -33,13 +30,11 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass protected: LEONMachineFunctionPass(char &ID); - int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex); void clearUsedRegisterList() { UsedRegisters.clear(); } void markRegisterUsed(int registerIndex) { UsedRegisters.push_back(registerIndex); } - int getUnusedFPRegister(MachineRegisterInfo &MRI); }; class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 822b64d3de105..8f5b7301e6532 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -620,8 +620,8 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, } bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { - MVT ExtT = ExtVal.getSimpleValueType(); - MVT MemT = cast(ExtVal->getOperand(0))->getSimpleValueType(0); + EVT ExtT = ExtVal.getValueType(); + EVT MemT = cast(ExtVal->getOperand(0))->getValueType(0); return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || (ExtT == MVT::v2i64 && MemT == MVT::v2i32); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index 337077178557d..8fa794c0b932e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -15,11 +15,10 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" - namespace llvm { +class MachineInstr; +class MachineOperand; class WebAssemblyFunctionInfo; namespace WebAssembly { diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index aa03217d155d5..75b2368ce1850 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -448,7 +448,7 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; unsigned Index; diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 9f1fece1b9dd8..fdc65acffe3d8 100644 --- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -154,7 +154,7 @@ static bool isPotentialBlockedMemCpyLd(unsigned Opcode) { return isXMMLoadOpcode(Opcode) || isYMMLoadOpcode(Opcode); } -static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode) { +static bool isPotentialBlockedMemCpyPair(unsigned LdOpcode, unsigned StOpcode) { switch (LdOpcode) { case X86::MOVUPSrm: case X86::MOVAPSrm: @@ -206,7 +206,7 @@ static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode) { } } -static bool isPotentialBlockingStoreInst(int Opcode, int LoadOpcode) { +static bool isPotentialBlockingStoreInst(unsigned Opcode, unsigned LoadOpcode) { bool PBlock = false; PBlock |= Opcode == X86::MOV64mr || Opcode == X86::MOV64mi32 || Opcode == X86::MOV32mr || Opcode == X86::MOV32mi || diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp index 540ad98b6d54f..8155ce3d0bb66 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToVex.cpp @@ -250,7 +250,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { (Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable) : makeArrayRef(X86EvexToVex128CompressTable); - auto I = llvm::lower_bound(Table, MI.getOpcode()); + const auto *I = llvm::lower_bound(Table, MI.getOpcode()); if (I == Table.end() || I->EvexOpcode != MI.getOpcode()) return false; diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index 78de041329e20..f8d822aebc5b6 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -187,8 +187,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { /// If so, return that super register in \p SuperDestReg. bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, Register &SuperDestReg) const { - auto *TRI = &TII->getRegisterInfo(); - + const X86RegisterInfo *TRI = &TII->getRegisterInfo(); Register OrigDestReg = OrigMI->getOperand(0).getReg(); SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); @@ -320,7 +319,7 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { // This is only correct if we access the same subregister index: otherwise, // we could try to replace "movb %ah, %al" with "movl %eax, %eax". - auto *TRI = &TII->getRegisterInfo(); + const X86RegisterInfo *TRI = &TII->getRegisterInfo(); if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) != TRI->getSubRegIndex(NewDestReg, OldDest.getReg())) return nullptr; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index c7ca6fb2a4fcf..7437c2e978af2 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -490,9 +490,9 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( } const MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const unsigned FramePtr = TRI->getFrameRegister(MF); - const unsigned MachineFramePtr = - STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64)) + const Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); // Offset = space for return address + size of the frame pointer itself. @@ -1788,7 +1788,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. const bool Is64BitILP32 = STI.isTarget64BitILP32(); Register FramePtr = TRI->getFrameRegister(MF); - unsigned MachineFramePtr = + Register MachineFramePtr = Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index ee61787170fc1..840f132ec6664 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3379,7 +3379,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { // Match the shift amount as: (bitwidth - y). It should go away, too. if (ShiftAmt.getOpcode() != ISD::SUB) return false; - auto V0 = dyn_cast(ShiftAmt.getOperand(0)); + auto *V0 = dyn_cast(ShiftAmt.getOperand(0)); if (!V0 || V0->getZExtValue() != Bitwidth) return false; NBits = ShiftAmt.getOperand(1); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 674e3d88ae890..ad8704f686c16 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -195,6 +195,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ABS , MVT::i32 , Custom); } setOperationAction(ISD::ABS , MVT::i64 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i128 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -7452,8 +7454,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, } Ops.push_back(Src); Ops.append(SubInputs.begin(), SubInputs.end()); - for (int i = 0; i != (int)NumElts; ++i) - Mask.push_back(i); + if (ISD::isBuildVectorAllZeros(Src.getNode())) + Mask.append(NumElts, SM_SentinelZero); + else + for (int i = 0; i != (int)NumElts; ++i) + Mask.push_back(i); for (int i = 0; i != (int)NumSubElts; ++i) { int M = SubMask[i]; if (0 <= M) { @@ -10888,20 +10893,25 @@ static bool isTargetShuffleEquivalent(ArrayRef Mask, // Attempt to create a shuffle mask from a VSELECT condition mask. static bool createShuffleMaskFromVSELECT(SmallVectorImpl &Mask, SDValue Cond) { - if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) + EVT CondVT = Cond.getValueType(); + unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); + unsigned NumElts = CondVT.getVectorNumElements(); + + APInt UndefElts; + SmallVector EltBits; + if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, + true, false)) return false; - unsigned Size = Cond.getValueType().getVectorNumElements(); - Mask.resize(Size, SM_SentinelUndef); + Mask.resize(NumElts, SM_SentinelUndef); - for (int i = 0; i != (int)Size; ++i) { - SDValue CondElt = Cond.getOperand(i); + for (int i = 0; i != (int)NumElts; ++i) { Mask[i] = i; // Arbitrarily choose from the 2nd operand if the select condition element // is undef. // TODO: Can we do better by matching patterns such as even/odd? - if (CondElt.isUndef() || isNullConstant(CondElt)) - Mask[i] += Size; + if (UndefElts[i] || EltBits[i].isNullValue()) + Mask[i] += NumElts; } return true; @@ -14946,16 +14956,27 @@ static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + int NumElts = VT.getVectorNumElements(); MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); - SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); + MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, NumElts); + SDValue MaskNode; MVT ShuffleVT = VT; if (!VT.is512BitVector() && !Subtarget.hasVLX()) { V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512); V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512); - MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); ShuffleVT = V1.getSimpleValueType(); + + // Adjust mask to correct indices for the second input. + unsigned Scale = 512 / VT.getSizeInBits(); + SmallVector AdjustedMask(Mask.begin(), Mask.end()); + for (int &M : AdjustedMask) + if (NumElts <= M) + M += (Scale - 1) * NumElts; + MaskNode = getConstVector(AdjustedMask, MaskVecVT, DAG, DL, true); + MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); + } else { + MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); } SDValue Result; @@ -15545,53 +15566,94 @@ static SDValue lowerShuffleAsLanePermuteAndPermute( int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits() / 128; int NumEltsPerLane = NumElts / NumLanes; + bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef(); + + /// Attempts to find a sublane permute with the given size + /// that gets all elements into their target lanes. + /// + /// If successful, fills CrossLaneMask and InLaneMask and returns true. + /// If unsuccessful, returns false and may overwrite InLaneMask. + auto getSublanePermute = [&](int NumSublanes) -> SDValue { + int NumSublanesPerLane = NumSublanes / NumLanes; + int NumEltsPerSublane = NumElts / NumSublanes; + + SmallVector CrossLaneMask; + SmallVector InLaneMask(NumElts, SM_SentinelUndef); + // CrossLaneMask but one entry == one sublane. + SmallVector CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef); - SmallVector SrcLaneMask(NumLanes, SM_SentinelUndef); - SmallVector PermMask(NumElts, SM_SentinelUndef); - - for (int i = 0; i != NumElts; ++i) { - int M = Mask[i]; - if (M < 0) - continue; + for (int i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (M < 0) + continue; - // Ensure that each lane comes from a single source lane. - int SrcLane = M / NumEltsPerLane; - int DstLane = i / NumEltsPerLane; - if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane)) - return SDValue(); - SrcLaneMask[DstLane] = SrcLane; + int SrcSublane = M / NumEltsPerSublane; + int DstLane = i / NumEltsPerLane; - PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane); - } + // We only need to get the elements into the right lane, not sublane. + // So search all sublanes that make up the destination lane. + bool Found = false; + int DstSubStart = DstLane * NumSublanesPerLane; + int DstSubEnd = DstSubStart + NumSublanesPerLane; + for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) { + if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane)) + continue; - // Make sure we set all elements of the lane mask, to avoid undef propagation. - SmallVector LaneMask(NumElts, SM_SentinelUndef); - for (int DstLane = 0; DstLane != NumLanes; ++DstLane) { - int SrcLane = SrcLaneMask[DstLane]; - if (0 <= SrcLane) - for (int j = 0; j != NumEltsPerLane; ++j) { - LaneMask[(DstLane * NumEltsPerLane) + j] = - (SrcLane * NumEltsPerLane) + j; + Found = true; + CrossLaneMaskLarge[DstSublane] = SrcSublane; + int DstSublaneOffset = DstSublane * NumEltsPerSublane; + InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane; + break; } - } + if (!Found) + return SDValue(); + } - // If we're only shuffling a single lowest lane and the rest are identity - // then don't bother. - // TODO - isShuffleMaskInputInPlace could be extended to something like this. - int NumIdentityLanes = 0; - bool OnlyShuffleLowestLane = true; - for (int i = 0; i != NumLanes; ++i) { - if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane, - i * NumEltsPerLane)) - NumIdentityLanes++; - else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes) - OnlyShuffleLowestLane = false; - } - if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) + // Fill CrossLaneMask using CrossLaneMaskLarge. + narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask); + + if (!CanUseSublanes) { + // If we're only shuffling a single lowest lane and the rest are identity + // then don't bother. + // TODO - isShuffleMaskInputInPlace could be extended to something like + // this. + int NumIdentityLanes = 0; + bool OnlyShuffleLowestLane = true; + for (int i = 0; i != NumLanes; ++i) { + int LaneOffset = i * NumEltsPerLane; + if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane, + i * NumEltsPerLane)) + NumIdentityLanes++; + else if (CrossLaneMask[LaneOffset] != 0) + OnlyShuffleLowestLane = false; + } + if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) + return SDValue(); + } + + SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask); + return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT), + InLaneMask); + }; + + // First attempt a solution with full lanes. + if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes)) + return V; + + // The rest of the solutions use sublanes. + if (!CanUseSublanes) return SDValue(); - SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask); - return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask); + // Then attempt a solution with 64-bit sublanes (vpermq). + if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes * 2)) + return V; + + // If that doesn't work and we have fast variable shuffle, + // attempt 32-bit sublanes (vpermd). + if (!Subtarget.hasFastVariableShuffle()) + return SDValue(); + + return getSublanePermute(/*NumSublanes=*/NumLanes * 4); } /// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one @@ -18139,9 +18201,11 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, // Only non-legal VSELECTs reach this lowering, convert those into generic // shuffles and re-use the shuffle lowering path for blends. - SmallVector Mask; - if (createShuffleMaskFromVSELECT(Mask, Cond)) - return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); + if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { + SmallVector Mask; + if (createShuffleMaskFromVSELECT(Mask, Cond)) + return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); + } return SDValue(); } @@ -20142,10 +20206,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue Store = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Align(8)); // For i64 source, we need to add the appropriate power of 2 if the input - // was negative. This is the same as the optimization in - // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, - // we must be careful to do the computation in x87 extended precision, not - // in SSE. (The generic code can't know it's OK to do this, or how to.) + // was negative. We must be careful to do the computation in x87 extended + // precision, not in SSE. SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot }; SDValue Fild = @@ -22671,8 +22733,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Try to use SUBUS and PCMPEQ. - if (SDValue V = LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) - return V; + if (FlipSigns) + if (SDValue V = + LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) + return V; // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple @@ -29657,9 +29721,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ABS: { - assert(N->getValueType(0) == MVT::i64 && + assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) && "Unexpected type (!= i64) on ABS."); - MVT HalfT = MVT::i32; + assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) && + "Unexpected type (!= i128) on ABS."); + MVT VT = N->getSimpleValueType(0); + MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32; SDValue Lo, Hi, Tmp; SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); @@ -29675,7 +29742,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue(Lo.getNode(), 1)); Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi)); return; } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. @@ -34863,6 +34930,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2)); if (!isAnyZero(Mask) && !PreferPERMQ) { + if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) + return SDValue(); // Nothing to do! if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG)) return DAG.getBitcast(RootVT, V); } @@ -35178,44 +35247,48 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) { // If we have a single input lane-crossing shuffle then lower to VPERMV. - if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros && - ((Subtarget.hasAVX2() && - (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasAVX512() && - (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || - MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); - Res = DAG.getBitcast(MaskVT, V1); - Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); - return DAG.getBitcast(RootVT, Res); + if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros) { + if (Subtarget.hasAVX2() && + (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) { + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + Res = DAG.getBitcast(MaskVT, V1); + Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); + return DAG.getBitcast(RootVT, Res); + } + // AVX512 variants (non-VLX will pad to 512-bit shuffles). + if ((Subtarget.hasAVX512() && + (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || + (Subtarget.hasBWI() && + (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && + (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) { + V1 = DAG.getBitcast(MaskVT, V1); + V2 = DAG.getUNDEF(MaskVT); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); + return DAG.getBitcast(RootVT, Res); + } } // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero - // vector as the second source. + // vector as the second source (non-VLX will pad to 512-bit shuffles). if (UnaryShuffle && AllowVariableMask && ((Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || + MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasVLX() && - (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || - MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { + (Subtarget.hasBWI() && + (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && + (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { // Adjust shuffle mask - replace SM_SentinelZero with second source index. for (unsigned i = 0; i != NumMaskElts; ++i) if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; - - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); - Res = DAG.getBitcast(MaskVT, V1); - SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL); - Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero); + V1 = DAG.getBitcast(MaskVT, V1); + V2 = getZeroVector(MaskVT, Subtarget, DAG, DL); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } @@ -35226,22 +35299,21 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG, Subtarget)) return WideShuffle; - // If we have a dual input lane-crossing shuffle then lower to VPERMV3. + // If we have a dual input lane-crossing shuffle then lower to VPERMV3, + // (non-VLX will pad to 512-bit shuffles). if (AllowVariableMask && !MaskContainsZeros && ((Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || - MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasVLX() && - (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || + MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || + MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + (Subtarget.hasBWI() && + (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && + (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); - Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } return SDValue(); @@ -35398,25 +35470,22 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG, Subtarget)) return WideShuffle; - // If we have a dual input shuffle then lower to VPERMV3. + // If we have a dual input shuffle then lower to VPERMV3, + // (non-VLX will pad to 512-bit shuffles) if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros && ((Subtarget.hasAVX512() && - (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || - MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasVLX() && - (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 || - MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || - MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && - (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && - (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) { - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 || + MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || + MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || + MaskVT == MVT::v16i32)) || + (Subtarget.hasBWI() && (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || + MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || + MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); - Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } @@ -40270,6 +40339,36 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); } + // fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y)) + // by forcing the unselected elements to zero. + // TODO: Can we handle more shuffles with this? + if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() && + LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB && + LHS.hasOneUse() && RHS.hasOneUse()) { + MVT SimpleVT = VT.getSimpleVT(); + bool LHSUnary, RHSUnary; + SmallVector LHSOps, RHSOps; + SmallVector LHSMask, RHSMask, CondMask; + if (createShuffleMaskFromVSELECT(CondMask, Cond) && + getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask, + LHSUnary) && + getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask, + RHSUnary)) { + int NumElts = VT.getVectorNumElements(); + for (int i = 0; i != NumElts; ++i) { + if (CondMask[i] < NumElts) + RHSMask[i] = 0x80; + else + LHSMask[i] = 0x80; + } + LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), + getConstVector(LHSMask, SimpleVT, DAG, DL, true)); + RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0), + getConstVector(RHSMask, SimpleVT, DAG, DL, true)); + return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); + } + } + // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom x select(Cond, Y, X) - if (CondVT.getScalarType() != MVT::i1) + if (CondVT.getScalarType() != MVT::i1) { if (SDValue CondNot = IsNOT(Cond, DAG)) return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the signbit. + if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() && + ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) { + Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT, + DAG.getConstant(0, DL, CondVT), Cond.getOperand(0)); + return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS); + } + } // Try to optimize vXi1 selects if both operands are either all constants or // bitcasts from scalar integer type. In that case we can convert the operands diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 3c24f51ba36b1..5aac29e21d6f9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6712,7 +6712,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { "X86-64 PIC uses RIP relative addressing"); X86MachineFunctionInfo *X86FI = MF->getInfo(); - unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + Register GlobalBaseReg = X86FI->getGlobalBaseReg(); if (GlobalBaseReg != 0) return GlobalBaseReg; @@ -8268,7 +8268,7 @@ describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, // If the described register is a sub-register of the destination register, // then pick out the source register's corresponding sub-register. if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) { - unsigned SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx); + Register SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx); return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); } @@ -8532,7 +8532,7 @@ namespace { return false; X86MachineFunctionInfo *X86FI = MF.getInfo(); - unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + Register GlobalBaseReg = X86FI->getGlobalBaseReg(); // If we didn't need a GlobalBaseReg, don't insert code. if (GlobalBaseReg == 0) @@ -8545,7 +8545,7 @@ namespace { MachineRegisterInfo &RegInfo = MF.getRegInfo(); const X86InstrInfo *TII = STI.getInstrInfo(); - unsigned PC; + Register PC; if (STI.isPICStyleGOT()) PC = RegInfo.createVirtualRegister(&X86::GR32RegClass); else @@ -8615,7 +8615,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} +} // namespace char CGBR::ID = 0; FunctionPass* diff --git a/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/llvm/lib/Target/X86/X86TargetObjectFile.cpp index 2b48baccc01fc..b88ad5a478f39 100644 --- a/llvm/lib/Target/X86/X86TargetObjectFile.cpp +++ b/llvm/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,16 +7,8 @@ //===----------------------------------------------------------------------===// #include "X86TargetObjectFile.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Operator.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/X86/X86TargetObjectFile.h b/llvm/lib/Target/X86/X86TargetObjectFile.h index acea772eb036d..f4bf52c83771f 100644 --- a/llvm/lib/Target/X86/X86TargetObjectFile.h +++ b/llvm/lib/Target/X86/X86TargetObjectFile.h @@ -36,7 +36,7 @@ namespace llvm { MCStreamer &Streamer) const override; }; - /// This implemenatation is used for X86 ELF targets that don't + /// This implementation is used for X86 ELF targets that don't /// have a further specialization. class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF { public: diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 05d1465b3663b..f3053398cd5ae 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1990,12 +1990,13 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, return true; } + bool Changed = false; + // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); - bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. Changed = CleanupPointerRootUsers(GV, GetTLI); @@ -2021,11 +2022,14 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, // Don't actually mark a global constant if it's atomic because atomic loads // are implemented by a trivial cmpxchg in some edge-cases and that usually // requires write access to the variable even if it's not actually changed. - if (GS.Ordering == AtomicOrdering::NotAtomic) + if (GS.Ordering == AtomicOrdering::NotAtomic) { + assert(!GV->isConstant() && "Expected a non-constant global"); GV->setConstant(true); + Changed = true; + } // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); + Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -2085,7 +2089,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, } } - return false; + return Changed; } /// Analyze the specified global variable and optimize it if possible. If we diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 8dfe42ebc27b5..3804a4bb79214 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1480,6 +1480,9 @@ struct AAICVTrackerFunction : public AAICVTracker { auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; Function *CalledFunction = CB->getCalledFunction(); + // Indirect call, assume ICV changes. + if (CalledFunction == nullptr) + return nullptr; if (CalledFunction == GetterRFI.Declaration) return None; if (CalledFunction == SetterRFI.Declaration) { diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 5c32e251588fd..50e87f0ab684f 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1020,13 +1020,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); + PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 6812bedf26d1a..5ce32bc592d05 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1615,43 +1615,27 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // this. bool Swapped = false; GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; + if (!isa(LHS) && isa(RHS)) { + std::swap(LHS, RHS); + Swapped = true; + } - // For now we require one side to be the base pointer "A" or a constant - // GEP derived from it. - if (GEPOperator *LHSGEP = dyn_cast(LHS)) { + // Require at least one GEP with a common base pointer on both sides. + if (auto *LHSGEP = dyn_cast(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { GEP1 = LHSGEP; - Swapped = false; - } else if (GEPOperator *RHSGEP = dyn_cast(RHS)) { + } else if (auto *RHSGEP = dyn_cast(RHS)) { // (gep X, ...) - (gep X, ...) if (LHSGEP->getOperand(0)->stripPointerCasts() == - RHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = RHSGEP; + RHSGEP->getOperand(0)->stripPointerCasts()) { GEP1 = LHSGEP; - Swapped = false; - } - } - } - - if (GEPOperator *RHSGEP = dyn_cast(RHS)) { - // X - (gep X, ...) - if (RHSGEP->getOperand(0) == LHS) { - GEP1 = RHSGEP; - Swapped = true; - } else if (GEPOperator *LHSGEP = dyn_cast(LHS)) { - // (gep X, ...) - (gep X, ...) - if (RHSGEP->getOperand(0)->stripPointerCasts() == - LHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = LHSGEP; - GEP1 = RHSGEP; - Swapped = true; + GEP2 = RHSGEP; } } } if (!GEP1) - // No GEP found. return nullptr; if (GEP2) { @@ -1691,7 +1675,7 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // pointer, subtract it from the offset we have. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder.CreateSub(Result, Offset); + Result = Builder.CreateSub(Result, Offset, "gepdiff"); } // If we have p - gep(p, ...) then we have to negate the result. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 995d0b6a8db71..40f6e9e147d76 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -428,6 +428,9 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) return IC.replaceOperand(II, 0, X); + + if (match(Op0, m_Intrinsic(m_Value(X)))) + return IC.replaceOperand(II, 0, X); } KnownBits Known = IC.computeKnownBits(Op0, 0, &II); @@ -622,7 +625,7 @@ Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { return nullptr; } -static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) { +static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap"); Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1); if (isa(Arg0) && !isa(Arg1)) { @@ -763,6 +766,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } + if (II->isCommutative()) { + if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) + return NewCall; + } + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { case Intrinsic::objectsize: @@ -771,6 +779,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return nullptr; case Intrinsic::abs: { Value *IIOperand = II->getArgOperand(0); + bool IntMinIsPoison = cast(II->getArgOperand(1))->isOneValue(); + // abs(-x) -> abs(x) // TODO: Copy nsw if it was present on the neg? Value *X; @@ -781,6 +791,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); + if (Optional Imp = isImpliedByDomCondition( + ICmpInst::ICMP_SGE, IIOperand, + Constant::getNullValue(IIOperand->getType()), II, DL)) { + // abs(x) -> x if x >= 0 + if (*Imp) + return replaceInstUsesWith(*II, IIOperand); + + // abs(x) -> -x if x < 0 + if (IntMinIsPoison) + return BinaryOperator::CreateNSWNeg(IIOperand); + return BinaryOperator::CreateNeg(IIOperand); + } + break; } case Intrinsic::bswap: { @@ -905,8 +928,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: { - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) return I; @@ -934,10 +955,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; - LLVM_FALLTHROUGH; - case Intrinsic::usub_with_overflow: if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) return I; @@ -968,9 +985,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::uadd_sat: case Intrinsic::sadd_sat: - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; - LLVM_FALLTHROUGH; case Intrinsic::usub_sat: case Intrinsic::ssub_sat: { SaturatingInst *SI = cast(II); @@ -1051,8 +1065,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::maxnum: case Intrinsic::minimum: case Intrinsic::maximum: { - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); Value *X, *Y; @@ -1161,9 +1173,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { LLVM_FALLTHROUGH; } case Intrinsic::fma: { - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; - // fma fneg(x), fneg(y), z -> fma x, y, z Value *Src0 = II->getArgOperand(0); Value *Src1 = II->getArgOperand(1); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 42dfecd5ae727..946543692012e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2238,9 +2238,7 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, if (!VectorType::isValidElementType(DestType)) return nullptr; - unsigned NumElts = - cast(ExtElt->getVectorOperandType())->getNumElements(); - auto *NewVecType = FixedVectorType::get(DestType, NumElts); + auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType()); auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), NewVecType, "bc"); return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8860586c9aa19..608017b6dca25 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3088,6 +3088,13 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( Type *Ty = II->getType(); unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { + case Intrinsic::abs: + // abs(A) == 0 -> A == 0 + if (C.isNullValue()) + return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), + Constant::getNullValue(Ty)); + break; + case Intrinsic::bswap: // bswap(A) == C -> A == bswap(C) return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), @@ -3976,6 +3983,19 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, ConstantExpr::getNeg(RHSC)); } + { + // Try to remove shared constant multiplier from equality comparison: + // X * C == Y * C (with no overflowing/aliasing) --> X == Y + Value *X, *Y; + const APInt *C; + if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 && + match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality()) + if (!C->countTrailingZeros() || + (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) || + (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap())) + return new ICmpInst(Pred, X, Y); + } + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -4052,10 +4072,6 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } - // If there are no trailing zeros in the multiplier, just eliminate - // the multiplies (no masking is needed): - // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y - return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index a03cb5e470511..62ee7d00780ef 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -159,6 +159,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitFenceInst(FenceInst &FI); Instruction *visitSwitchInst(SwitchInst &SI); Instruction *visitReturnInst(ReturnInst &RI); + Instruction *visitUnreachableInst(UnreachableInst &I); Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI); Instruction *visitInsertValueInst(InsertValueInst &IV); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 26db91cc51129..245fd588a5231 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -275,6 +275,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) return BinaryOperator::CreateMul(X, X); + + if (match(Op0, m_Intrinsic(m_Value(X)))) + return BinaryOperator::CreateMul(X, X); } // -X * C --> X * -C @@ -544,6 +547,21 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { return replaceInstUsesWith(I, Sqrt); } + // The following transforms are done irrespective of the number of uses + // for the expression "1.0/sqrt(X)". + // 1) 1.0/sqrt(X) * X -> X/sqrt(X) + // 2) X * 1.0/sqrt(X) -> X/sqrt(X) + // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it + // has the necessary (reassoc) fast-math-flags. + if (I.hasNoSignedZeros() && + match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && + match(Y, m_Intrinsic(m_Value(X))) && Op1 == X) + return BinaryOperator::CreateFDivFMF(X, Y, &I); + if (I.hasNoSignedZeros() && + match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && + match(Y, m_Intrinsic(m_Value(X))) && Op0 == X) + return BinaryOperator::CreateFDivFMF(X, Y, &I); + // Like the similar transform in instsimplify, this requires 'nsz' because // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0. if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 178e9a4a17bdd..0ca256860c596 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2798,6 +2798,19 @@ Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { return nullptr; } +Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { + // Try to remove the previous instruction if it must lead to unreachable. + // This includes instructions like stores and "llvm.assume" that may not get + // removed by simple dead code elimination. + Instruction *Prev = I.getPrevNonDebugInstruction(); + if (Prev && !Prev->isEHPad() && + isGuaranteedToTransferExecutionToSuccessor(Prev)) { + eraseInstFromFunction(*Prev); + return &I; + } + return nullptr; +} + Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { assert(BI.isUnconditional() && "Only for unconditional branches."); diff --git a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp b/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp index 6372dfded82a7..5f8671d7d88fc 100644 --- a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp @@ -1,5 +1,4 @@ -//===- HeapProfiler.cpp - heap allocation and access profiler -//--------------===// +//===- HeapProfiler.cpp - heap allocation and access profiler -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -164,7 +163,8 @@ class HeapProfiler { /// If it is an interesting memory access, populate information /// about the access and return a InterestingMemoryAccess struct. /// Otherwise return None. - Optional isInterestingMemoryAccess(Instruction *I); + Optional + isInterestingMemoryAccess(Instruction *I) const; void instrumentMop(Instruction *I, const DataLayout &DL, InterestingMemoryAccess &Access); @@ -321,7 +321,7 @@ void HeapProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { } Optional -HeapProfiler::isInterestingMemoryAccess(Instruction *I) { +HeapProfiler::isInterestingMemoryAccess(Instruction *I) const { // Do not instrument the load fetching the dynamic shadow address. if (DynamicShadowOffset == I) return None; diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 7e4ef1ed9cd84..109e15d6d7cfc 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1828,29 +1828,42 @@ struct DSEState { MemoryAccess *Current = StartAccess; Instruction *KillingI = KillingDef->getMemoryInst(); bool StepAgain; - LLVM_DEBUG(dbgs() << " trying to get dominating access for " - << *StartAccess << "\n"); + LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. do { StepAgain = false; + LLVM_DEBUG({ + dbgs() << " visiting " << *Current; + if (!MSSA.isLiveOnEntryDef(Current) && isa(Current)) + dbgs() << " (" << *cast(Current)->getMemoryInst() + << ")"; + dbgs() << "\n"; + }); + // Reached TOP. - if (MSSA.isLiveOnEntryDef(Current)) + if (MSSA.isLiveOnEntryDef(Current)) { + LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n"); return None; + } // Cost of a step. Accesses in the same block are more likely to be valid // candidates for elimination, hence consider them cheaper. unsigned StepCost = KillingDef->getBlock() == Current->getBlock() ? MemorySSASameBBStepCost : MemorySSAOtherBBStepCost; - if (WalkerStepLimit <= StepCost) + if (WalkerStepLimit <= StepCost) { + LLVM_DEBUG(dbgs() << " ... hit walker step limit\n"); return None; + } WalkerStepLimit -= StepCost; // Return for MemoryPhis. They cannot be eliminated directly and the // caller is responsible for traversing them. - if (isa(Current)) + if (isa(Current)) { + LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); return Current; + } // Below, check if CurrentDef is a valid candidate to be eliminated by // KillingDef. If it is not, check the next candidate. @@ -2023,6 +2036,14 @@ struct DSEState { if (isMemTerminator(DefLoc, UseInst)) continue; + if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { + LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); + Cache.KnownReads.insert(UseAccess); + Cache.KnownReads.insert(StartAccess); + Cache.KnownReads.insert(EarlierAccess); + return None; + } + // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. if (isReadClobber(DefLoc, UseInst)) { diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index ff7596b19cb2b..c71038d66f995 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -36,6 +36,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -293,9 +295,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { // of their operands get the same value number by sorting the operand value // numbers. Since commutative operands are the 1st two operands it is more // efficient to sort by hand rather than using, say, std::sort. - assert(((isa(I) && I->getNumOperands() == 2) || - (isa(I) && I->getNumOperands() == 3)) - && "Unsupported commutative instruction!"); + assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); e.commutative = true; @@ -653,14 +653,18 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); + auto *MSSA = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE); + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); PA.preserve(); + if (MSSA) + PA.preserve(); if (LI) PA.preserve(); return PA; @@ -1335,6 +1339,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); + if (MSSAU) { + auto *MSSA = MSSAU->getMemorySSA(); + // Get the defining access of the original load or use the load if it is a + // MemoryDef (e.g. because it is volatile). The inserted loads are + // guaranteed to load from the same definition. + auto *LIAcc = MSSA->getMemoryAccess(LI); + auto *DefiningAcc = + isa(LIAcc) ? LIAcc : LIAcc->getDefiningAccess(); + auto *NewAccess = MSSAU->createMemoryAccessInBB( + NewLoad, DefiningAcc, NewLoad->getParent(), + MemorySSA::BeforeTerminator); + if (auto *NewDef = dyn_cast(NewAccess)) + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + else + MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); + } // Transfer the old load's AA tags to the new load. AAMDNodes Tags; @@ -1551,9 +1571,17 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. FIXME: We could insert unreachable // instruction directly because we can modify the CFG. - new StoreInst(UndefValue::get(Int8Ty), - Constant::getNullValue(Int8Ty->getPointerTo()), - IntrinsicI); + auto *NewS = new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + IntrinsicI); + if (MSSAU) { + // This added store is to null, so it will never executed and we can + // just use the LiveOnEntry def as defining access. + auto *NewDef = MSSAU->createMemoryAccessInBB( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), NewS->getParent(), + MemorySSA::BeforeTerminator); + MSSAU->insertDef(cast(NewDef), /*RenameUses=*/true); + } } if (isAssumeWithEmptyBundle(*IntrinsicI)) markInstructionForDeletion(IntrinsicI); @@ -1687,6 +1715,8 @@ bool GVN::processLoad(LoadInst *L) { // Replace the load! patchAndReplaceAllUsesWith(L, AvailableValue); markInstructionForDeletion(L); + if (MSSAU) + MSSAU->removeMemoryAccess(L); ++NumGVNLoad; reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more @@ -1808,9 +1838,7 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, } if (Exp.commutative) { - assert((Exp.varargs.size() == 2 || - (Exp.opcode == Instruction::Call && Exp.varargs.size() == 3)) - && "Unsupported commutative instruction!"); + assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!"); if (Exp.varargs[0] > Exp.varargs[1]) { std::swap(Exp.varargs[0], Exp.varargs[1]); uint32_t Opcode = Exp.opcode >> 8; @@ -2204,7 +2232,7 @@ bool GVN::processInstruction(Instruction *I) { bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE) { + OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2217,6 +2245,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; + MemorySSAUpdater Updater(MSSA); + MSSAU = MSSA ? &Updater : nullptr; bool Changed = false; bool ShouldContinue = true; @@ -2227,7 +2257,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = &*FI++; - bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD); + bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD); if (removedBlock) ++NumGVNBlocks; @@ -2263,6 +2293,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // iteration. DeadBlocks.clear(); + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + return Changed; } @@ -2303,6 +2336,8 @@ bool GVN::processBlock(BasicBlock *BB) { salvageKnowledge(I, AC); salvageDebugInfo(*I); if (MD) MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); LLVM_DEBUG(verifyRemoved(I)); ICF->removeInstruction(I); I->eraseFromParent(); @@ -2533,6 +2568,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); + if (MSSAU) + MSSAU->removeMemoryAccess(CurInst); LLVM_DEBUG(verifyRemoved(CurInst)); // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes // some assertion failures. @@ -2577,7 +2614,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { // possible. BasicBlock *BB = SplitCriticalEdge( Pred, Succ, - CriticalEdgeSplittingOptions(DT, LI).unsetPreserveLoopSimplify()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2592,7 +2629,7 @@ bool GVN::splitCriticalEdges() { do { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT, LI)); + CriticalEdgeSplittingOptions(DT, LI, MSSAU)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2791,6 +2828,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { auto *LIWP = getAnalysisIfAvailable(); + auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), @@ -2800,7 +2838,8 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { ? &getAnalysis().getMemDep() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, - &getAnalysis().getORE()); + &getAnalysis().getORE(), + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2817,6 +2856,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); } private: diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp index 76e3f7859f08a..c11d2e4c1d6b9 100644 --- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -17,7 +17,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -131,10 +130,6 @@ FunctionPass *llvm::createInstSimplifyLegacyPass() { return new InstSimplifyLegacyPass(); } -void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createInstSimplifyLegacyPass()); -} - PreservedAnalyses InstSimplifyPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 1fd899aeebb36..9011e36619d94 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -23,6 +23,8 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -272,12 +274,13 @@ class MemCpyOptLegacyPass : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); } }; @@ -315,7 +318,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, MemsetRanges Ranges(DL); BasicBlock::iterator BI(StartInst); + + // Keeps track of the last memory use or def before the insertion point for + // the new memset. The new MemoryDef for the inserted memsets will be inserted + // after MemInsertPoint. It points to either LastMemDef or to the last user + // before the insertion point of the memset, if there are any such users. + MemoryUseOrDef *MemInsertPoint = nullptr; + // Keeps track of the last MemoryDef between StartInst and the insertion point + // for the new memset. This will become the defining access of the inserted + // memsets. + MemoryDef *LastMemDef = nullptr; for (++BI; !BI->isTerminator(); ++BI) { + if (MSSAU) { + auto *CurrentAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); + if (CurrentAcc) { + MemInsertPoint = CurrentAcc; + if (auto *CurrentDef = dyn_cast(CurrentAcc)) + LastMemDef = CurrentDef; + } + } + if (!isa(BI) && !isa(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: @@ -394,15 +417,31 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, : Range.TheStores) dbgs() << *SI << '\n'; dbgs() << "With: " << *AMemSet << '\n'); - if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + if (MSSAU) { + assert(LastMemDef && MemInsertPoint && + "Both LastMemDef and MemInsertPoint need to be set"); + auto *NewDef = + cast(MemInsertPoint->getMemoryInst() == &*BI + ? MSSAU->createMemoryAccessBefore( + AMemSet, LastMemDef, MemInsertPoint) + : MSSAU->createMemoryAccessAfter( + AMemSet, LastMemDef, MemInsertPoint)); + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + LastMemDef = NewDef; + MemInsertPoint = NewDef; + } + // Zap all the stores. for (Instruction *SI : Range.TheStores) { + if (MSSAU) + MSSAU->removeMemoryAccess(SI); MD->removeInstruction(SI); SI->eraseFromParent(); } + ++NumMemSetInfer; } @@ -573,6 +612,17 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(P))); + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(P)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(SI); + MSSAU->removeMemoryAccess(LI); + } + MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); @@ -621,6 +671,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { DL.getTypeStoreSize(SI->getOperand(0)->getType()), commonAlignment(SI->getAlign(), LI->getAlign()), C); if (changed) { + if (MSSAU) { + MSSAU->removeMemoryAccess(SI); + MSSAU->removeMemoryAccess(LI); + } + MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); @@ -658,6 +713,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(SI))); + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(SI); + } + MD->removeInstruction(SI); SI->eraseFromParent(); NumMemSetInfer++; @@ -939,14 +1003,23 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. IRBuilder<> Builder(M); + Instruction *NewM; if (UseMemMove) - Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); else - Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); + + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(M))); + auto *LastDef = cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(M); + } // Remove the instruction we're replacing. MD->removeInstruction(M); @@ -1012,11 +1085,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); Value *MemsetLen = Builder.CreateSelect( Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); - Builder.CreateMemSet( + Instruction *NewMemSet = Builder.CreateMemSet( Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest, SrcSize), MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && + "MemCpy must be a MemoryDef"); + // The new memset is inserted after the memcpy, but it is known that its + // defining access is the memset about to be removed which immediately + // precedes the memcpy. + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessBefore( + NewMemSet, LastDef->getDefiningAccess(), LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(MemSet); + } + MD->removeInstruction(MemSet); MemSet->eraseFromParent(); return true; @@ -1081,8 +1168,16 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, } IRBuilder<> Builder(MemCpy); - Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize, - MaybeAlign(MemCpy->getDestAlignment())); + Instruction *NewM = + Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), + CopySize, MaybeAlign(MemCpy->getDestAlignment())); + if (MSSAU) { + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + } + return true; } @@ -1098,6 +1193,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { ++BBI; + if (MSSAU) + MSSAU->removeMemoryAccess(M); + MD->removeInstruction(M); M->eraseFromParent(); return true; @@ -1109,8 +1207,18 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), M->getModule()->getDataLayout())) { IRBuilder<> Builder(M); - Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - MaybeAlign(M->getDestAlignment()), false); + Instruction *NewM = + Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), + MaybeAlign(M->getDestAlignment()), false); + if (MSSAU) { + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(M); + } + MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -1145,6 +1253,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M->getDest(), M->getSource(), CopySize->getZExtValue(), Alignment, C)) { + if (MSSAU) + MSSAU->removeMemoryAccess(M); + MD->removeInstruction(M); M->eraseFromParent(); return true; @@ -1161,6 +1272,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { + if (MSSAU) + MSSAU->removeMemoryAccess(M); + MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; @@ -1171,6 +1285,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (SrcDepInfo.isClobber()) if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) if (performMemCpyToMemSetOptzn(M, MDep)) { + if (MSSAU) + MSSAU->removeMemoryAccess(M); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -1201,6 +1317,9 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) { M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys)); + // For MemorySSA nothing really changes (except that memcpy may imply stricter + // aliasing guarantees). + // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. MD->removeInstruction(M); @@ -1338,8 +1457,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); + auto *MSSA = AM.getCachedResult(F); - bool MadeChange = runImpl(F, &MD, &TLI, AA, AC, DT); + bool MadeChange = + runImpl(F, &MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); if (!MadeChange) return PreservedAnalyses::all(); @@ -1347,18 +1468,23 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserveSet(); PA.preserve(); PA.preserve(); + if (MSSA) + PA.preserve(); return PA; } bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_) { + AssumptionCache *AC_, DominatorTree *DT_, + MemorySSA *MSSA_) { bool MadeChange = false; MD = MD_; TLI = TLI_; AA = AA_; AC = AC_; DT = DT_; + MemorySSAUpdater MSSAU_(MSSA_); + MSSAU = MSSA_ ? &MSSAU_ : nullptr; // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. @@ -1371,6 +1497,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, MadeChange = true; } + if (MSSA_ && VerifyMemorySSA) + MSSA_->verifyMemorySSA(); + MD = nullptr; return MadeChange; } @@ -1385,6 +1514,8 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *DT = &getAnalysis().getDomTree(); + auto *MSSAWP = getAnalysisIfAvailable(); - return Impl.runImpl(F, MD, TLI, AA, AC, DT); + return Impl.runImpl(F, MD, TLI, AA, AC, DT, + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 57befc9c3cfb3..2afc778ed8214 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -2112,9 +2113,27 @@ bool llvm::runIPSCCP( } // Zap all returns which we've identified as zap to change. + SmallSetVector FuncZappedReturn; for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) { Function *F = ReturnsToZap[i]->getParent()->getParent(); ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); + // Record all functions that are zapped. + FuncZappedReturn.insert(F); + } + + // Remove the returned attribute for zapped functions and the + // corresponding call sites. + for (Function *F : FuncZappedReturn) { + for (Argument &A : F->args()) + F->removeParamAttr(A.getArgNo(), Attribute::Returned); + for (Use &U : F->uses()) { + // Skip over blockaddr users. + if (isa(U.getUser())) + continue; + CallBase *CB = cast(U.getUser()); + for (Use &Arg : CB->args()) + CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned); + } } // If we inferred constant or undef values for globals variables, we can diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 55b9dd7482cc3..f4dc6f2996b98 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -166,6 +166,10 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIndVarSimplifyPass()); } +void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createInstSimplifyLegacyPass()); +} + void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createJumpThreadingPass()); } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 2b0ae722458b3..41349457e2b95 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -182,6 +182,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, TheOnlyDest = SI->case_begin()->getCaseSuccessor(); } + bool Changed = false; + // Figure out which case it goes to. for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { // Found case matching a constant operand? @@ -220,6 +222,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); + Changed = true; if (DTU) DTU->applyUpdatesPermissive( {{DominatorTree::Delete, ParentBB, DefaultDest}}); @@ -308,7 +311,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); return true; } - return false; + return Changed; } if (auto *IBI = dyn_cast(T)) { diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 1e8b11d6ac5fe..1bb827cd3057b 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2177,13 +2177,133 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, return None; } +template static int costAndCollectOperands( + const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, + TargetTransformInfo::TargetCostKind CostKind, + SmallVectorImpl &Worklist) { + + const T *S = cast(WorkItem.S); + int Cost = 0; + // Collect the opcodes of all the instructions that will be needed to expand + // the SCEVExpr. This is so that when we come to cost the operands, we know + // what the generated user(s) will be. + SmallVector Opcodes; + + auto CastCost = [&](unsigned Opcode) { + Opcodes.push_back(Opcode); + return TTI.getCastInstrCost(Opcode, S->getType(), + S->getOperand(0)->getType(), + TTI::CastContextHint::None, CostKind); + }; + + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired) { + Opcodes.push_back(Opcode); + return NumRequired * + TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); + }; + + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired) { + Opcodes.push_back(Opcode); + Type *OpType = S->getOperand(0)->getType(); + return NumRequired * + TTI.getCmpSelInstrCost(Opcode, OpType, + CmpInst::makeCmpResultType(OpType), CostKind); + }; + + switch (S->getSCEVType()) { + default: + llvm_unreachable("No other scev expressions possible."); + case scUnknown: + case scConstant: + return 0; + case scTruncate: + Cost = CastCost(Instruction::Trunc); + break; + case scZeroExtend: + Cost = CastCost(Instruction::ZExt); + break; + case scSignExtend: + Cost = CastCost(Instruction::SExt); + break; + case scUDivExpr: { + unsigned Opcode = Instruction::UDiv; + if (auto *SC = dyn_cast(S->getOperand(1))) + if (SC->getAPInt().isPowerOf2()) + Opcode = Instruction::LShr; + Cost = ArithCost(Opcode, 1); + break; + } + case scAddExpr: + Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); + break; + case scMulExpr: + // TODO: this is a very pessimistic cost modelling for Mul, + // because of Bin Pow algorithm actually used by the expander, + // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). + Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1); + Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1); + break; + } + case scAddRecExpr: { + // In this polynominal, we may have some zero operands, and we shouldn't + // really charge for those. So how many non-zero coeffients are there? + int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { + return !Op->isZero(); + }); + + assert(NumTerms >= 1 && "Polynominal should have at least one term."); + assert(!(*std::prev(S->operands().end()))->isZero() && + "Last operand should not be zero"); + + // Ignoring constant term (operand 0), how many of the coeffients are u> 1? + int NumNonZeroDegreeNonOneTerms = + llvm::count_if(S->operands(), [](const SCEV *Op) { + auto *SConst = dyn_cast(Op); + return !SConst || SConst->getAPInt().ugt(1); + }); + + // Much like with normal add expr, the polynominal will require + // one less addition than the number of it's terms. + int AddCost = ArithCost(Instruction::Add, NumTerms - 1); + // Here, *each* one of those will require a multiplication. + int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + Cost = AddCost + MulCost; + + // What is the degree of this polynominal? + int PolyDegree = S->getNumOperands() - 1; + assert(PolyDegree >= 1 && "Should be at least affine."); + + // The final term will be: + // Op_{PolyDegree} * x ^ {PolyDegree} + // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. + // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for + // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. + // FIXME: this is conservatively correct, but might be overly pessimistic. + Cost += MulCost * (PolyDegree - 1); + } + } + + for (unsigned Opc : Opcodes) + for (auto I : enumerate(S->operands())) + Worklist.emplace_back(Opc, I.index(), I.value()); + return Cost; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining, - const TargetTransformInfo &TTI, SmallPtrSetImpl &Processed, - SmallVectorImpl &Worklist) { + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl &Processed, + SmallVectorImpl &Worklist) { if (BudgetRemaining < 0) return true; // Already run out of budget, give up. + const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? if (!Processed.insert(S).second) return false; // We have already accounted for this expression. @@ -2202,44 +2322,12 @@ bool SCEVExpander::isHighCostExpansionHelper( TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_RecipThroughput; - if (auto *CastExpr = dyn_cast(S)) { - unsigned Opcode; - switch (S->getSCEVType()) { - case scTruncate: - Opcode = Instruction::Trunc; - break; - case scZeroExtend: - Opcode = Instruction::ZExt; - break; - case scSignExtend: - Opcode = Instruction::SExt; - break; - default: - llvm_unreachable("There are no other cast types."); - } - const SCEV *Op = CastExpr->getOperand(); - BudgetRemaining -= TTI.getCastInstrCost( - Opcode, /*Dst=*/S->getType(), - /*Src=*/Op->getType(), TTI::CastContextHint::None, CostKind); - Worklist.emplace_back(Op); + if (isa(S)) { + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. - } - - if (auto *UDivExpr = dyn_cast(S)) { - // If the divisor is a power of two count this as a logical right-shift. - if (auto *SC = dyn_cast(UDivExpr->getRHS())) { - if (SC->getAPInt().isPowerOf2()) { - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), - CostKind); - // Note that we don't count the cost of RHS, because it is a constant, - // and we consider those to be free. But if that changes, we would need - // to log2() it first before calling isHighCostExpansionHelper(). - Worklist.emplace_back(UDivExpr->getLHS()); - return false; // Will answer upon next entry into this function. - } - } - + } else if (isa(S)) { // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some @@ -2252,117 +2340,28 @@ bool SCEVExpander::isHighCostExpansionHelper( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); // Need to count the cost of this UDiv. - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), - CostKind); - Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. - } - - if (const auto *NAry = dyn_cast(S)) { - Type *OpType = NAry->getType(); - - assert(NAry->getNumOperands() >= 2 && - "Polynomial should be at least linear"); - - int AddCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - int MulCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - - // In this polynominal, we may have some zero operands, and we shouldn't - // really charge for those. So how many non-zero coeffients are there? - int NumTerms = llvm::count_if(NAry->operands(), - [](const SCEV *S) { return !S->isZero(); }); - assert(NumTerms >= 1 && "Polynominal should have at least one term."); - assert(!(*std::prev(NAry->operands().end()))->isZero() && - "Last operand should not be zero"); - - // Much like with normal add expr, the polynominal will require - // one less addition than the number of it's terms. - BudgetRemaining -= AddCost * (NumTerms - 1); - if (BudgetRemaining < 0) - return true; - - // Ignoring constant term (operand 0), how many of the coeffients are u> 1? - int NumNonZeroDegreeNonOneTerms = - llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()), - [](const SCEV *S) { - auto *SConst = dyn_cast(S); - return !SConst || SConst->getAPInt().ugt(1); - }); - // Here, *each* one of those will require a multiplication. - BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms; - if (BudgetRemaining < 0) - return true; - - // What is the degree of this polynominal? - int PolyDegree = NAry->getNumOperands() - 1; - assert(PolyDegree >= 1 && "Should be at least affine."); - - // The final term will be: - // Op_{PolyDegree} * x ^ {PolyDegree} - // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. - // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for - // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. - // FIXME: this is conservatively correct, but might be overly pessimistic. - BudgetRemaining -= MulCost * (PolyDegree - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - if (const SCEVNAryExpr *NAry = dyn_cast(S)) { - Type *OpType = NAry->getType(); - - int PairCost; - switch (S->getSCEVType()) { - case scAddExpr: - PairCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - break; - case scMulExpr: - // TODO: this is a very pessimistic cost modelling for Mul, - // because of Bin Pow algorithm actually used by the expander, - // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - PairCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - break; - case scSMaxExpr: - case scUMaxExpr: - case scSMinExpr: - case scUMinExpr: - PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind) + - TTI.getCmpSelInstrCost(Instruction::Select, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind); - break; - default: - llvm_unreachable("There are no other variants here."); - } - + } else if (const SCEVNAryExpr *NAry = dyn_cast(S)) { assert(NAry->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. - BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - llvm_unreachable("No other scev expressions possible."); + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; + return BudgetRemaining < 0; + } else if (const auto *NAry = dyn_cast(S)) { + assert(NAry->getNumOperands() >= 2 && + "Polynomial should be at least linear"); + BudgetRemaining -= costAndCollectOperands( + WorkItem, TTI, CostKind, Worklist); + return BudgetRemaining < 0; + } else + llvm_unreachable("No other scev expressions possible."); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ae2471969160c..124a7c423e72c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -143,6 +143,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); +// Two is chosen to allow one negation and a logical combine. +static cl::opt + BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, + cl::init(2), + cl::desc("Maximum cost of combining conditions when " + "folding branches")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -2684,12 +2691,16 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); const unsigned PredCount = pred_size(BB); bool Changed = false; + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; Instruction *Cond = nullptr; if (BI->isConditional()) @@ -2818,6 +2829,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, continue; } + // Check the cost of inserting the necessary logic before performing the + // transformation. + if (TTI && Opc != Instruction::BinaryOpsEnd) { + Type *Ty = BI->getCondition()->getType(); + unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || + !isa(PBI->getCondition()))) + Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); + + if (Cost > BranchFoldThreshold) + continue; + } + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); Changed = true; @@ -6013,7 +6037,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; } @@ -6076,7 +6100,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 078b2ba1c70ac..9c9e2ec8222d1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -51,14 +51,12 @@ namespace llvm { class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -template class InterleaveGroup; class LoopInfo; class raw_ostream; class RecurrenceDescriptor; class Value; class VPBasicBlock; class VPRegionBlock; -class VPSlotTracker; class VPlan; class VPlanSlp; diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 1cc0e40da3a2b..29e9b92040d43 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -100,36 +100,36 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { Type *ScalarTy = Scalar->getType(); if (!Load || !Load->isSimple()) return false; + auto *Ty = dyn_cast(I.getType()); + if (!Ty) + return false; // TODO: Extend this to match GEP with constant offsets. Value *PtrOp = Load->getPointerOperand()->stripPointerCasts(); assert(isa(PtrOp->getType()) && "Expected a pointer type"); - unsigned VectorSize = TTI.getMinVectorRegisterBitWidth(); + unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); - if (!ScalarSize || !VectorSize || VectorSize % ScalarSize != 0) + if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0) return false; // Check safety of replacing the scalar load with a larger vector load. - unsigned VecNumElts = VectorSize / ScalarSize; - auto *VectorTy = VectorType::get(ScalarTy, VecNumElts, false); - // TODO: Allow insert/extract subvector if the type does not match. - if (VectorTy != I.getType()) - return false; + unsigned MinVecNumElts = MinVectorSize / ScalarSize; + auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); Align Alignment = Load->getAlign(); const DataLayout &DL = I.getModule()->getDataLayout(); - if (!isSafeToLoadUnconditionally(PtrOp, VectorTy, Alignment, DL, Load, &DT)) + if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT)) return false; unsigned AS = Load->getPointerAddressSpace(); // Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0 int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS); - APInt DemandedElts = APInt::getOneBitSet(VecNumElts, 0); - OldCost += TTI.getScalarizationOverhead(VectorTy, DemandedElts, true, false); + APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); + OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, true, false); // New pattern: load VecPtr - int NewCost = TTI.getMemoryOpCost(Instruction::Load, VectorTy, Alignment, AS); + int NewCost = TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); // We can aggressively convert to the vector form because the backend can // invert this transform if it does not result in a performance win. @@ -139,8 +139,18 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // It is safe and potentially profitable to load a vector directly: // inselt undef, load Scalar, 0 --> load VecPtr IRBuilder<> Builder(Load); - Value *CastedPtr = Builder.CreateBitCast(PtrOp, VectorTy->getPointerTo(AS)); - LoadInst *VecLd = Builder.CreateAlignedLoad(VectorTy, CastedPtr, Alignment); + Value *CastedPtr = Builder.CreateBitCast(PtrOp, MinVecTy->getPointerTo(AS)); + Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); + + // If the insert type does not match the target's minimum vector type, + // use an identity shuffle to shrink/grow the vector. + if (Ty != MinVecTy) { + unsigned OutputNumElts = Ty->getNumElements(); + SmallVector Mask(OutputNumElts, UndefMaskElem); + for (unsigned i = 0; i < OutputNumElts && i < MinVecNumElts; ++i) + Mask[i] = i; + VecLd = Builder.CreateShuffleVector(VecLd, UndefValue::get(MinVecTy), Mask); + } replaceValue(I, *VecLd); ++NumVecLoad; return true; @@ -424,11 +434,14 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask)))))) return false; - // Disallow non-vector casts and length-changing shuffles. + // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for + // scalable type is unknown; Second, we cannot reason if the narrowed shuffle + // mask for scalable type is a splat or not. + // 2) Disallow non-vector casts and length-changing shuffles. // TODO: We could allow any shuffle. - auto *DestTy = dyn_cast(I.getType()); - auto *SrcTy = cast(V->getType()); - if (!DestTy || I.getOperand(0)->getType() != SrcTy) + auto *DestTy = dyn_cast(I.getType()); + auto *SrcTy = dyn_cast(V->getType()); + if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) return false; // The new shuffle must not cost more than the old shuffle. The bitcast is @@ -437,10 +450,8 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) return false; - // FIXME: it should be possible to implement the computation of the widened - // shuffle mask in terms of ElementCount to work with scalable shuffles. - unsigned DestNumElts = cast(DestTy)->getNumElements(); - unsigned SrcNumElts = cast(SrcTy)->getNumElements(); + unsigned DestNumElts = DestTy->getNumElements(); + unsigned SrcNumElts = SrcTy->getNumElements(); SmallVector NewMask; if (SrcNumElts <= DestNumElts) { // The bitcast is from wide to narrow/equal elements. The shuffle mask can diff --git a/llvm/test/Analysis/BasicAA/assume.ll b/llvm/test/Analysis/BasicAA/assume.ll index 1a7de5aa6afef..fe83a8c3df0e3 100644 --- a/llvm/test/Analysis/BasicAA/assume.ll +++ b/llvm/test/Analysis/BasicAA/assume.ll @@ -14,8 +14,8 @@ define void @test1(i8* %P, i8* %Q) nounwind ssp { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true) ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.assume(i1 true) } diff --git a/llvm/test/Analysis/BasicAA/cs-cs.ll b/llvm/test/Analysis/BasicAA/cs-cs.ll index fa7a5f972aafe..49eedd4279374 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs.ll @@ -17,12 +17,12 @@ define void @test2(i8* %P, i8* %Q) #3 { ; CHECK-LABEL: Function: test2: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2_atomic(i8* %P, i8* %Q) #3 { @@ -33,12 +33,12 @@ define void @test2_atomic(i8* %P, i8* %Q) #3 { ; CHECK-LABEL: Function: test2_atomic: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Mod: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Mod: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) } define void @test2a(i8* noalias %P, i8* noalias %Q) #3 { @@ -149,12 +149,12 @@ define void @test3(i8* %P, i8* %Q) #3 { ; CHECK-LABEL: Function: test3: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test3a(i8* noalias %P, i8* noalias %Q) #3 { @@ -199,14 +199,14 @@ define void @test5(i8* %P, i8* %Q, i8* %R) #3 { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: MayAlias: i8* %P, i8* %R ; CHECK: MayAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test5a(i8* noalias %P, i8* noalias %Q, i8* noalias %R) nounwind ssp { diff --git a/llvm/test/Analysis/BasicAA/guards.ll b/llvm/test/Analysis/BasicAA/guards.ll index e6cee5f6d1e44..66293503ed4ac 100644 --- a/llvm/test/Analysis/BasicAA/guards.ll +++ b/llvm/test/Analysis/BasicAA/guards.ll @@ -14,8 +14,8 @@ define void @test1(i8* %P, i8* %Q) { ; CHECK: Just Ref: Ptr: i8* %P <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] } diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll index 8513cefe5c119..a1a0d11cac2ec 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -63,9 +63,9 @@ define void @i1() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i1 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %j = or i1 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i1 undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %c = add i1 undef, undef diff --git a/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll b/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll index 83a5544768475..678b652c61103 100644 --- a/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll +++ b/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll @@ -6,7 +6,7 @@ define void @source_clobber(i8* %a, i8* %b) { ; CHECK-LABEL: @source_clobber( ; CHECK-NEXT: ; 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false) -; CHECK-NEXT: ; MemoryUse(liveOnEntry) +; CHECK-NEXT: ; MemoryUse(1) MayAlias ; CHECK-NEXT: [[X:%.*]] = load i8, i8* %b ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Analysis/MemorySSA/pr41853.ll b/llvm/test/Analysis/MemorySSA/pr41853.ll index 6dbc9d7826214..f7bf21c9f90f2 100644 --- a/llvm/test/Analysis/MemorySSA/pr41853.ll +++ b/llvm/test/Analysis/MemorySSA/pr41853.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -memoryssa -loop-simplify -early-cse-memssa -verify-memoryssa %s | FileCheck %s +; RUN: opt -S -memoryssa -loop-simplify -early-cse-memssa -earlycse-debug-hash -verify-memoryssa %s | FileCheck %s ; RUN: opt -S -memoryssa -loop-simplify -early-cse-memssa -enable-mssa-loop-dependency -verify-memoryssa %s | FileCheck %s ; REQUIRES: asserts target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll b/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll new file mode 100644 index 0000000000000..86ba0b7b658ef --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s + +declare i32 @llvm.umax.i32(i32, i32) +declare i32 @llvm.umin.i32(i32, i32) +declare i32 @llvm.smax.i32(i32, i32) +declare i32 @llvm.smin.i32(i32, i32) + +define i32 @umax(i32 %x, i32 %y) { +; CHECK-LABEL: 'umax' +; CHECK-NEXT: Classifying expressions for: @umax +; CHECK-NEXT: %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> (%x umax %y) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @umax +; + %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @umin(i32 %x, i32 %y) { +; CHECK-LABEL: 'umin' +; CHECK-NEXT: Classifying expressions for: @umin +; CHECK-NEXT: %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> (%x umin %y) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @umin +; + %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @smax(i32 %x, i32 %y) { +; CHECK-LABEL: 'smax' +; CHECK-NEXT: Classifying expressions for: @smax +; CHECK-NEXT: %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> (%x smax %y) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @smax +; + %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @smin(i32 %x, i32 %y) { +; CHECK-LABEL: 'smin' +; CHECK-NEXT: Classifying expressions for: @smin +; CHECK-NEXT: %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> (%x smin %y) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @smin +; + %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @clamp(i32 %x) { +; CHECK-LABEL: 'clamp' +; CHECK-NEXT: Classifying expressions for: @clamp +; CHECK-NEXT: %y = call i32 @llvm.umax.i32(i32 %x, i32 10) +; CHECK-NEXT: --> (10 umax %x) U: [10,0) S: [10,0) +; CHECK-NEXT: %z = call i32 @llvm.umin.i32(i32 %y, i32 20) +; CHECK-NEXT: --> (20 umin (10 umax %x)) U: [10,21) S: [10,21) +; CHECK-NEXT: Determining loop execution counts for: @clamp +; + %y = call i32 @llvm.umax.i32(i32 %x, i32 10) + %z = call i32 @llvm.umin.i32(i32 %y, i32 20) + ret i32 %z +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir index 3356206c4cfca..93f8e4284cd4c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir @@ -164,3 +164,121 @@ body: | $q0 = COPY %5(<4 x s32>) RET_ReallyLR implicit $q0 ... +--- +name: icmp_and_icmp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_and_icmp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[COPY]](s64), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %5:_(s1) = G_AND %3, %4 + %6:_(s1) = G_XOR %5, %2 + %7:_(s32) = G_ANYEXT %6 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_or_icmp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_or_icmp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[COPY]](s64), [[C]] + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %5:_(s1) = G_OR %3, %4 + %6:_(s1) = G_XOR %5, %2 + %7:_(s32) = G_ANYEXT %6 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_and_icmp_or_icmp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_and_icmp_or_icmp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[COPY]](s64), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[OR]], [[ICMP2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %5:_(s1) = G_AND %3, %4 + %6:_(s1) = G_ICMP intpred(ne), %0(s64), %1 + %7:_(s1) = G_OR %5, %6 + %8:_(s1) = G_XOR %7, %2 + %9:_(s32) = G_ANYEXT %8 + $w0 = COPY %9(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_and_trunc +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_and_trunc + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[TRUNC]] + ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[AND]], [[C1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_TRUNC %0(s64) + %5:_(s1) = G_AND %3, %4 + %6:_(s1) = G_XOR %5, %2 + %7:_(s32) = G_ANYEXT %6 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll index 3f8fa3e9e3837..b4c6e7736837a 100644 --- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -207,9 +207,8 @@ define void @le_i32_to_i16(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w0, [x1] -; BE-NEXT: strh w8, [x1, #2] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -228,9 +227,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16_order: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w8, [x1, #2] -; BE-NEXT: strh w0, [x1] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -244,9 +242,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w0, [x1, #2] -; LE-NEXT: strh w8, [x1] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16: @@ -265,9 +262,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16_order: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w8, [x1] -; LE-NEXT: strh w0, [x1, #2] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16_order: @@ -528,13 +524,12 @@ define void @le_i64_to_i16_order(i64 %x, i16* %p0) { define void @be_i64_to_i16(i64 %x, i16* %p0) { ; LE-LABEL: be_i64_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #16 -; LE-NEXT: lsr x9, x0, #32 -; LE-NEXT: lsr x10, x0, #48 -; LE-NEXT: strh w0, [x1, #6] -; LE-NEXT: strh w8, [x1, #4] -; LE-NEXT: strh w9, [x1, #2] -; LE-NEXT: strh w10, [x1] +; LE-NEXT: lsr x8, x0, #32 +; LE-NEXT: lsr x9, x0, #48 +; LE-NEXT: ror w10, w0, #16 +; LE-NEXT: str w10, [x1, #4] +; LE-NEXT: strh w8, [x1, #2] +; LE-NEXT: strh w9, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i16: @@ -599,8 +594,8 @@ define void @le_i64_to_i32(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -619,8 +614,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32_order: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -634,8 +629,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32: @@ -654,8 +649,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32_order: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32_order: diff --git a/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll new file mode 100644 index 0000000000000..0a31271d3f8be --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll @@ -0,0 +1,485 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; FCEIL + +define @frintp_nxv8f16( %a) { +; CHECK-LABEL: frintp_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv8f16( %a) + ret %res +} + +define @frintp_nxv4f16( %a) { +; CHECK-LABEL: frintp_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv4f16( %a) + ret %res +} + +define @frintp_nxv2f16( %a) { +; CHECK-LABEL: frintp_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f16( %a) + ret %res +} + +define @frintp_nxv4f32( %a) { +; CHECK-LABEL: frintp_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintp z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv4f32( %a) + ret %res +} + +define @frintp_nxv2f32( %a) { +; CHECK-LABEL: frintp_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintp z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f32( %a) + ret %res +} + +define @frintp_nxv2f64( %a) { +; CHECK-LABEL: frintp_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintp z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f64( %a) + ret %res +} + +; FFLOOR + +define @frintm_nxv8f16( %a) { +; CHECK-LABEL: frintm_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintm z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv8f16( %a) + ret %res +} + +define @frintm_nxv4f16( %a) { +; CHECK-LABEL: frintm_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintm z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv4f16( %a) + ret %res +} + +define @frintm_nxv2f16( %a) { +; CHECK-LABEL: frintm_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintm z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv2f16( %a) + ret %res +} + +define @frintm_nxv4f32( %a) { +; CHECK-LABEL: frintm_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintm z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv4f32( %a) + ret %res +} + +define @frintm_nxv2f32( %a) { +; CHECK-LABEL: frintm_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintm z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv2f32( %a) + ret %res +} + +define @frintm_nxv2f64( %a) { +; CHECK-LABEL: frintm_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintm z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv2f64( %a) + ret %res +} + +; FNEARBYINT + +define @frinti_nxv8f16( %a) { +; CHECK-LABEL: frinti_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frinti z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv8f16( %a) + ret %res +} + +define @frinti_nxv4f16( %a) { +; CHECK-LABEL: frinti_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinti z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv4f16( %a) + ret %res +} + +define @frinti_nxv2f16( %a) { +; CHECK-LABEL: frinti_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinti z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv2f16( %a) + ret %res +} + +define @frinti_nxv4f32( %a) { +; CHECK-LABEL: frinti_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinti z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv4f32( %a) + ret %res +} + +define @frinti_nxv2f32( %a) { +; CHECK-LABEL: frinti_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinti z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv2f32( %a) + ret %res +} + +define @frinti_nxv2f64( %a) { +; CHECK-LABEL: frinti_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinti z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv2f64( %a) + ret %res +} + +; FRINT + +define @frintx_nxv8f16( %a) { +; CHECK-LABEL: frintx_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv8f16( %a) + ret %res +} + +define @frintx_nxv4f16( %a) { +; CHECK-LABEL: frintx_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv4f16( %a) + ret %res +} + +define @frintx_nxv2f16( %a) { +; CHECK-LABEL: frintx_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv2f16( %a) + ret %res +} + +define @frintx_nxv4f32( %a) { +; CHECK-LABEL: frintx_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv4f32( %a) + ret %res +} + +define @frintx_nxv2f32( %a) { +; CHECK-LABEL: frintx_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv2f32( %a) + ret %res +} + +define @frintx_nxv2f64( %a) { +; CHECK-LABEL: frintx_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv2f64( %a) + ret %res +} + +; ROUND + +define @frinta_nxv8f16( %a) { +; CHECK-LABEL: frinta_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frinta z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.round.nxv8f16( %a) + ret %res +} + +define @frinta_nxv4f16( %a) { +; CHECK-LABEL: frinta_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinta z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.round.nxv4f16( %a) + ret %res +} + +define @frinta_nxv2f16( %a) { +; CHECK-LABEL: frinta_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinta z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.round.nxv2f16( %a) + ret %res +} + +define @frinta_nxv4f32( %a) { +; CHECK-LABEL: frinta_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinta z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.round.nxv4f32( %a) + ret %res +} + +define @frinta_nxv2f32( %a) { +; CHECK-LABEL: frinta_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinta z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.round.nxv2f32( %a) + ret %res +} + +define @frinta_nxv2f64( %a) { +; CHECK-LABEL: frinta_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinta z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.round.nxv2f64( %a) + ret %res +} + +; ROUNDEVEN + +define @frintn_nxv8f16( %a) { +; CHECK-LABEL: frintn_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintn z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv8f16( %a) + ret %res +} + +define @frintn_nxv4f16( %a) { +; CHECK-LABEL: frintn_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintn z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv4f16( %a) + ret %res +} + +define @frintn_nxv2f16( %a) { +; CHECK-LABEL: frintn_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintn z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv2f16( %a) + ret %res +} + +define @frintn_nxv4f32( %a) { +; CHECK-LABEL: frintn_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintn z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv4f32( %a) + ret %res +} + +define @frintn_nxv2f32( %a) { +; CHECK-LABEL: frintn_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintn z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv2f32( %a) + ret %res +} + +define @frintn_nxv2f64( %a) { +; CHECK-LABEL: frintn_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintn z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv2f64( %a) + ret %res +} + +; FTRUNC + +define @frintz_nxv8f16( %a) { +; CHECK-LABEL: frintz_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv8f16( %a) + ret %res +} + +define @frintz_nxv4f16( %a) { +; CHECK-LABEL: frintz_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv4f16( %a) + ret %res +} + +define @frintz_nxv2f16( %a) { +; CHECK-LABEL: frintz_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv2f16( %a) + ret %res +} + +define @frintz_nxv4f32( %a) { +; CHECK-LABEL: frintz_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintz z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv4f32( %a) + ret %res +} + +define @frintz_nxv2f32( %a) { +; CHECK-LABEL: frintz_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintz z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv2f32( %a) + ret %res +} + +define @frintz_nxv2f64( %a) { +; CHECK-LABEL: frintz_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintz z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv2f64( %a) + ret %res +} + +declare @llvm.ceil.nxv8f16( ) +declare @llvm.ceil.nxv4f16( ) +declare @llvm.ceil.nxv2f16( ) +declare @llvm.ceil.nxv4f32() +declare @llvm.ceil.nxv2f32() +declare @llvm.ceil.nxv2f64() + +declare @llvm.floor.nxv8f16( ) +declare @llvm.floor.nxv4f16( ) +declare @llvm.floor.nxv2f16( ) +declare @llvm.floor.nxv4f32() +declare @llvm.floor.nxv2f32() +declare @llvm.floor.nxv2f64() + +declare @llvm.nearbyint.nxv8f16( ) +declare @llvm.nearbyint.nxv4f16( ) +declare @llvm.nearbyint.nxv2f16( ) +declare @llvm.nearbyint.nxv4f32() +declare @llvm.nearbyint.nxv2f32() +declare @llvm.nearbyint.nxv2f64() + +declare @llvm.rint.nxv8f16( ) +declare @llvm.rint.nxv4f16( ) +declare @llvm.rint.nxv2f16( ) +declare @llvm.rint.nxv4f32() +declare @llvm.rint.nxv2f32() +declare @llvm.rint.nxv2f64() + +declare @llvm.round.nxv8f16( ) +declare @llvm.round.nxv4f16( ) +declare @llvm.round.nxv2f16( ) +declare @llvm.round.nxv4f32() +declare @llvm.round.nxv2f32() +declare @llvm.round.nxv2f64() + +declare @llvm.roundeven.nxv8f16( ) +declare @llvm.roundeven.nxv4f16( ) +declare @llvm.roundeven.nxv2f16( ) +declare @llvm.roundeven.nxv4f32() +declare @llvm.roundeven.nxv2f32() +declare @llvm.roundeven.nxv2f64() + +declare @llvm.trunc.nxv8f16( ) +declare @llvm.trunc.nxv4f16( ) +declare @llvm.trunc.nxv2f16( ) +declare @llvm.trunc.nxv4f32() +declare @llvm.trunc.nxv2f32() +declare @llvm.trunc.nxv2f64() diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index 2afecdfc826d4..e4aea2847bc4c 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -480,62 +480,6 @@ define void @float_copy(* %P1, * %P2) { ret void } -; FCEIL - -define @frintp_nxv8f16( %a) { -; CHECK-LABEL: frintp_nxv8f16: -; CHECK: ptrue p0.h -; CHECK-NEXT: frintp z0.h, p0/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv8f16( %a) - ret %res -} - -define @frintp_nxv4f16( %a) { -; CHECK-LABEL: frintp_nxv4f16: -; CHECK: ptrue p0.s -; CHECK-NEXT: frintp z0.h, p0/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv4f16( %a) - ret %res -} - -define @frintp_nxv2f16( %a) { -; CHECK-LABEL: frintp_nxv2f16: -; CHECK: ptrue p0.d -; CHECK-NEXT: frintp z0.h, p0/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv2f16( %a) - ret %res -} - -define @frintp_nxv4f32( %a) { -; CHECK-LABEL: frintp_nxv4f32: -; CHECK: ptrue p0.s -; CHECK-NEXT: frintp z0.s, p0/m, z0.s -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv4f32( %a) - ret %res -} - -define @frintp_nxv2f32( %a) { -; CHECK-LABEL: frintp_nxv2f32: -; CHECK: ptrue p0.d -; CHECK-NEXT: frintp z0.s, p0/m, z0.s -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv2f32( %a) - ret %res -} - -define @frintp_nxv2f64( %a) { -; CHECK-LABEL: frintp_nxv2f64: -; CHECK: ptrue p0.d -; CHECK-NEXT: frintp z0.d, p0/m, z0.d -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv2f64( %a) - ret %res -} - declare @llvm.aarch64.sve.frecps.x.nxv8f16(, ) declare @llvm.aarch64.sve.frecps.x.nxv4f32( , ) declare @llvm.aarch64.sve.frecps.x.nxv2f64(, ) @@ -551,12 +495,5 @@ declare @llvm.fma.nxv8f16(, @llvm.fma.nxv4f16(, , ) declare @llvm.fma.nxv2f16(, , ) -declare @llvm.ceil.nxv8f16( ) -declare @llvm.ceil.nxv4f16( ) -declare @llvm.ceil.nxv2f16( ) -declare @llvm.ceil.nxv4f32() -declare @llvm.ceil.nxv2f32() -declare @llvm.ceil.nxv2f64() - ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2 diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll index e0d99d57d8e6a..aa01dae055123 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -1,5 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning ; EXTRACT VECTOR ELT diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll index 5e6dedf4a4cc1..cb998730d2bca 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -1,5 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning ; INSERT VECTOR ELT diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 3016d99fc8c1c..e6980b895f590 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -463,15 +463,16 @@ define void @ipra_call_with_stack() #0 { ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory: ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 -; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]] +; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:4 ; GCN: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN: s_mov_b32 s33, s32 ; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]] +; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:4 ; GCN: s_waitcnt vmcnt(0) ; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]] ; GCN: s_mov_b64 exec, [[COPY_EXEC2]] ; GCN: s_setpc_b64 +; GCN: ScratchSize: 8 define void @callee_need_to_spill_fp_to_memory() #3 { call void asm sideeffect "; clobber nonpreserved SGPRs", "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} @@ -529,8 +530,8 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset ; GCN: s_or_saveexec_b64 s[4:5], -1 ; GCN: v_mov_b32_e32 v0, s33 -; GCN-NOT: v_mov_b32_e32 v0, 0x100c -; GCN-NEXT: v_mov_b32_e32 v1, 0x100c +; GCN-NOT: v_mov_b32_e32 v0, 0x1008 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1008 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval align 4 %arg) #3 { %alloca = alloca i32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index 815251e3560ce..e87f1e7dc8dd0 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -16,16 +16,13 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) - ; GCN: DBG_VALUE ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc ; GCN: DBG_VALUE @@ -68,14 +65,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -118,14 +113,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -387,22 +380,19 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.5(0x80000000) - ; GCN: S_BRANCH %bb.5 - ; GCN: bb.4: - ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc - ; GCN: S_ENDPGM 0 ; GCN: bb.5: ; GCN: successors: %bb.4(0x80000000) ; GCN: S_BRANCH %bb.4 + ; GCN: bb.4: + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -484,21 +474,20 @@ body: | ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: ; GCN: successors: %bb.3(0x40000000), %bb.6(0x40000000) - ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_XOR_B64_]] - ; GCN: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[COPY1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[S_XOR_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GCN: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec ; GCN: bb.3: ; GCN: successors: %bb.3(0x40000000), %bb.4(0x40000000) - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], undef %4:sreg_64, implicit-def dead $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %4:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) ; GCN: bb.5: ; GCN: successors: %bb.6(0x80000000) - ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc ; GCN: bb.6: ; GCN: $exec = S_OR_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc ; GCN: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 9d66f849391d1..6da332a596fb0 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -198,23 +198,23 @@ end: ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0 ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 1 -; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} +; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} ; Regular spill value restored after exec modification ; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; Spill saved exec -; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] -; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] +; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] +; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] -; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], 0 -; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], 1 +; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]], 0 +; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC]], s[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]], 1 ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC]], off, s[0:3], 0 offset:[[FLOW_SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], 0 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} +; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]]{{\]}} ; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir index 08e6f1a067ac5..faea2df6b517b 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -100,13 +100,12 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %2 - ; CHECK: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_SAVEEXEC_B64 [[COPY]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 - ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY1]], implicit $exec + ; CHECK: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY2]], implicit $exec + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK: S_BRANCH %bb.2 ; CHECK: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index ca125c7897793..31531a43fc3f2 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -164,8 +164,8 @@ ; GCN-O1-NEXT: Delete dead loops ; GCN-O1-NEXT: Unroll loops ; GCN-O1-NEXT: SROA -; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Phi Values Analysis +; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Memory Dependence Analysis ; GCN-O1-NEXT: MemCpy Optimization ; GCN-O1-NEXT: Sparse Conditional Constant Propagation @@ -493,9 +493,9 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Optimization Remark Emitter ; GCN-O2-NEXT: Global Value Numbering +; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Memory Dependence Analysis ; GCN-O2-NEXT: MemCpy Optimization ; GCN-O2-NEXT: Sparse Conditional Constant Propagation @@ -853,9 +853,9 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: MemCpy Optimization ; GCN-O3-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index e8e3518aed1c2..3219c75c43a82 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -291,12 +291,12 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset ; GCN: s_or_saveexec_b64 s[4:5], -1 ; GCN: v_mov_b32_e32 v0, s33 -; GCN-NOT: v_mov_b32_e32 v0, 0x1088 -; GCN-NEXT: v_mov_b32_e32 v1, 0x1088 +; GCN-NOT: v_mov_b32_e32 v0, 0x1084 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1084 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; GCN: v_mov_b32_e32 v0, s34 -; GCN-NOT: v_mov_b32_e32 v0, 0x1090 -; GCN-NEXT: v_mov_b32_e32 v1, 0x1090 +; GCN-NOT: v_mov_b32_e32 v0, 0x1088 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1088 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 diff --git a/llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll b/llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll new file mode 100644 index 0000000000000..74f675876834a --- /dev/null +++ b/llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=armv7-arm-none-eabi -o - %s | FileCheck %s + +%struct.sMyType = type { i32 } + +@val = hidden constant %struct.sMyType zeroinitializer, align 4 +@v = internal global %struct.sMyType zeroinitializer, align 4 + +define hidden void @InitVal() local_unnamed_addr { +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct.sMyType* @v to i8*), i8* align 4 bitcast (%struct.sMyType* @val to i8*), i32 4, i1 true) +; The last argument is the isvolatile argument. This is a volatile memcpy. +; Test that the memcpy expansion does not optimize away the load. +; CHECK: ldr +; CHECK: str + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) diff --git a/llvm/test/CodeGen/ARM/pr45824.ll b/llvm/test/CodeGen/ARM/pr45824.ll index dda5bc656fcf7..221c764526b44 100644 --- a/llvm/test/CodeGen/ARM/pr45824.ll +++ b/llvm/test/CodeGen/ARM/pr45824.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv7-none-linux-eabi < %s | FileCheck %s -define void @vld1x2([8 x i32] %0) { +define void @vld1x2(i8* %v4, i32 %v2) { ; CHECK-LABEL: vld1x2: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, #0 @@ -13,26 +13,21 @@ define void @vld1x2([8 x i32] %0) { ; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0] ; CHECK-NEXT: b .LBB0_1 - %2 = extractvalue [8 x i32] %0, 5 - br label %3 - -3: ; preds = %.loopexit, %1 - %4 = getelementptr inbounds i8, i8* undef, i32 undef br label %.preheader .preheader: ; preds = %.preheader, %3 - %5 = icmp eq i8* %4, undef - br i1 %5, label %.loopexit, label %.preheader + %v5 = icmp eq i8* %v4, undef + br i1 %v5, label %.loopexit, label %.preheader .loopexit: ; preds = %.preheader - %6 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %4) - %7 = getelementptr inbounds i8, i8* %4, i32 %2 - %8 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %7) + %v6 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %v4) + %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2 + %v8 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %v7) tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1) - br label %3 + br label %.preheader } -define void @vld1x3([8 x i32] %0) { +define void @vld1x3(i8* %v4, i32 %v2) { ; CHECK-LABEL: vld1x3: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, #0 @@ -44,26 +39,21 @@ define void @vld1x3([8 x i32] %0) { ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0] ; CHECK-NEXT: b .LBB1_1 - %2 = extractvalue [8 x i32] %0, 5 - br label %3 - -3: ; preds = %.loopexit, %1 - %4 = getelementptr inbounds i8, i8* undef, i32 undef br label %.preheader .preheader: ; preds = %.preheader, %3 - %5 = icmp eq i8* %4, undef - br i1 %5, label %.loopexit, label %.preheader + %v5 = icmp eq i8* %v4, undef + br i1 %v5, label %.loopexit, label %.preheader .loopexit: ; preds = %.preheader - %6 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %4) - %7 = getelementptr inbounds i8, i8* %4, i32 %2 - %8 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %7) + %v6 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %v4) + %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2 + %v8 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %v7) tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1) - br label %3 + br label %.preheader } -define void @vld1x4([8 x i32] %0) { +define void @vld1x4(i8* %v4, i32 %v2) { ; CHECK-LABEL: vld1x4: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, #0 @@ -75,23 +65,18 @@ define void @vld1x4([8 x i32] %0) { ; CHECK-NEXT: @ in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0] ; CHECK-NEXT: b .LBB2_1 - %2 = extractvalue [8 x i32] %0, 5 - br label %3 - -3: ; preds = %.loopexit, %1 - %4 = getelementptr inbounds i8, i8* undef, i32 undef br label %.preheader .preheader: ; preds = %.preheader, %3 - %5 = icmp eq i8* %4, undef - br i1 %5, label %.loopexit, label %.preheader + %v5 = icmp eq i8* %v4, undef + br i1 %v5, label %.loopexit, label %.preheader .loopexit: ; preds = %.preheader - %6 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %4) - %7 = getelementptr inbounds i8, i8* %4, i32 %2 - %8 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %7) + %v6 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %v4) + %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2 + %v8 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %v7) tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1) - br label %3 + br label %.preheader } declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll index cc10c378f27b6..36157c66887cc 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that this compiles successfully. -; CHECK: vpacke +; CHECK: vdeal target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" target triple = "hexagon" @@ -17,4 +17,4 @@ b0: ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll new file mode 100644 index 0000000000000..bed13b1dbcc98 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check for successful compilation. +; CHECK-LABEL: f0: +; CHECK: dealloc_return + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dso_local void @f0(i16* %a0) local_unnamed_addr #0 { +b0: + %v0 = getelementptr i16, i16* %a0, i32 8 + %v1 = getelementptr i16, i16* %v0, i32 0 + %v2 = icmp eq i32 0, 0 + %v3 = insertelement <8 x i1> undef, i1 %v2, i64 0 + %v4 = shufflevector <8 x i1> %v3, <8 x i1> undef, <8 x i32> zeroinitializer + %v5 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull undef, i32 4, <8 x i1> %v4, <8 x i32> undef) + %v6 = sub nsw <8 x i32> zeroinitializer, %v5 + %v7 = add nsw <8 x i32> %v6, zeroinitializer + %v8 = add <8 x i32> zeroinitializer, %v7 + %v9 = lshr <8 x i32> %v8, + %v10 = trunc <8 x i32> %v9 to <8 x i16> + %v11 = bitcast i16* %v1 to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v10, <8 x i16>* %v11, i32 2, <8 x i1> %v4) + ret void +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32 immarg, <8 x i1>, <8 x i32>) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #2 + +attributes #0 = { "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls,-packets" } +attributes #1 = { argmemonly nounwind readonly willreturn } +attributes #2 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/short-store-widen.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-store.ll similarity index 100% rename from llvm/test/CodeGen/Hexagon/autohvx/short-store-widen.ll rename to llvm/test/CodeGen/Hexagon/autohvx/isel-widen-store.ll diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll new file mode 100644 index 0000000000000..404d3d1ff2606 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-hvx-widen=16 < %s | FileCheck %s + +; Check for successful compilation. +; CHECK-LABEL: f0: +; CHECK: vmemu + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dso_local void @f0(i16* %a0) local_unnamed_addr #0 { +b0: + %v0 = getelementptr i16, i16* %a0, i32 8 + %v1 = getelementptr i16, i16* %v0, i32 0 + %v2 = icmp eq i32 0, 0 + %v3 = insertelement <8 x i1> undef, i1 %v2, i64 0 + %v4 = shufflevector <8 x i1> %v3, <8 x i1> undef, <8 x i32> zeroinitializer + %v5 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull undef, i32 4, <8 x i1> %v4, <8 x i32> undef) + %v6 = sub nsw <8 x i32> zeroinitializer, %v5 + %v7 = add nsw <8 x i32> %v6, zeroinitializer + %v8 = add <8 x i32> zeroinitializer, %v7 + %v9 = lshr <8 x i32> %v8, + %v10 = trunc <8 x i32> %v9 to <8 x i16> + %v11 = bitcast i16* %v1 to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v10, <8 x i16>* %v11, i32 2, <8 x i1> %v4) + ret void +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32 immarg, <8 x i1>, <8 x i32>) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #2 + +attributes #0 = { "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls,-packets" } +attributes #1 = { argmemonly nounwind readonly willreturn } +attributes #2 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll new file mode 100644 index 0000000000000..6d5018757c7a6 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Check for successful compilation. +; Expect that the truncate to v32i8 is lowered to vdeale. + +; CHECK-LABEL: f0: +; CHECK: vdeale + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(<32 x i32> %a0) local_unnamed_addr #0 { +b0: + %v0 = trunc <32 x i32> %a0 to <32 x i8> + %v1 = shufflevector <32 x i8> %v0, <32 x i8> undef, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v1, <128 x i8>* undef, i32 128, <128 x i1> ) + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32 immarg, <128 x i1>) #1 + +attributes #0 = { "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } +attributes #1 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir index bc5c06d7f674b..e84ed9c368906 100644 --- a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir @@ -1,5 +1,6 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -o - %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -verify-machineinstrs -o - %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s --- | diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir index 7ece521bedbfd..78ed554687fa2 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir @@ -1,4 +1,5 @@ # RUN: llc -o - -run-pass mir-canonicalizer -verify-machineinstrs %s | FileCheck %s +# RUN: llc -o - -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs %s | FileCheck %s --- | target triple = "aarch64-unknown-unknown" define void @f() { unreachable } diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir index dbb6b62b68bb7..6d3124c61db1b 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass mir-canonicalizer -verify-machineinstrs -mtriple aarch64-unknown-linux-gnu -o - %s | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -mtriple aarch64-unknown-linux-gnu -o - %s | FileCheck %s ... --- name: foo diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir index e95b838030c74..4b03f42083dd7 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -mir-vreg-namer-use-stable-hash -run-pass mir-canonicalizer %s | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir index 74eae56f9cb66..924a34d5ce43d 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -mir-vreg-namer-use-stable-hash -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s # These Idempotent instructions are sorted alphabetically (based on after the '=') # CHECK: %bb0_{{[0-9]+}}__1:gpr64 = MOVi64imm 4617315517961601024 # CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 408 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir index 5d63f4107b921..ef4939e47136a 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir @@ -1,4 +1,4 @@ -# RUN: llc -x mir -mtriple aarch64-apple-ios -run-pass mir-namer -verify-machineinstrs -o - < %s | FileCheck %s +# RUN: llc -x mir -mtriple aarch64-apple-ios -run-pass mir-namer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - < %s | FileCheck %s --- name: foo diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir index 295ab75b6d825..6bde24eb33192 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -1,4 +1,5 @@ -# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer %s | FileCheck %s +# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer -verify-machineinstrs %s | FileCheck %s +# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs %s | FileCheck %s # This tests for the itereator invalidation fix (reviews.llvm.org/D62713) ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir index ea2f7de26875b..31f7d7b1b6a40 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir @@ -1,4 +1,5 @@ -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass mir-canonicalizer -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass mir-canonicalizer -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s | FileCheck %s --- | target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" diff --git a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir index 8bed8fe6af167..80230a59928db 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir @@ -1,5 +1,6 @@ # RUN: llc -march=amdgcn -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -run-pass mir-canonicalizer -verify-machineinstrs -o - %s +# RUN: llc -march=amdgcn -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s # Previously getReservedRegs was called before parsing # machineFunctionInfo, but the AMDGPU implementation depends on diff --git a/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir b/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir index 2202c74d643dd..91301af0f314b 100644 --- a/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir +++ b/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir @@ -1,4 +1,5 @@ -# RUN: llc -run-pass mir-canonicalizer -o - %s | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s | FileCheck %s --- name: cimm_fpimm_hash_test body: | diff --git a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir index 94c69f1be36a6..a0a2f9e378efa 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir @@ -1,5 +1,7 @@ # RUN: llc -run-pass mir-namer -x mir -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -run-pass mir-canonicalizer -x mir -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -run-pass mir-namer -mir-vreg-namer-use-stable-hash -x mir -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -x mir -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir b/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir index 68158563a6de7..23d5c2e7b60a2 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple x86_64-linux-gnu -run-pass mir-canonicalizer -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple x86_64-linux-gnu -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs %s -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir index d3c797ba8df57..bc5991ea41b5f 100644 --- a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir +++ b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir @@ -1,4 +1,5 @@ -# RUN: llc -march=x86-64 -run-pass mir-canonicalizer -o - %s | FileCheck %s +# RUN: llc -march=x86-64 -run-pass mir-canonicalizer -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=x86-64 -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s | FileCheck %s # The purpose of this test is to ensure that differing flags do in-fact cause # naming collisions with the new vreg renamers naming scheme. --- | diff --git a/llvm/test/CodeGen/PowerPC/constants-i64.ll b/llvm/test/CodeGen/PowerPC/constants-i64.ll index fa45dd19c1654..956845f5a5b35 100644 --- a/llvm/test/CodeGen/PowerPC/constants-i64.ll +++ b/llvm/test/CodeGen/PowerPC/constants-i64.ll @@ -80,5 +80,48 @@ entry: ; CHECK: blr } -attributes #0 = { nounwind readnone } +define i64 @cn32_1() #0 { +entry: + ret i64 3900000000 + +; CHECK-LABEL: @cn32_1 +; CHECK: lis [[REG1:[0-9]+]], 232 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 +; CHECK: sldi 3, [[REG1]], 8 +; CHECK: blr +} +define i32 @cn32_1_i32() #0 { +entry: + ret i32 -394967296 + +; CHECK-LABEL: @cn32_1_i32 +; CHECK: lis [[REG1:[0-9]+]], 232 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 +; CHECK: sldi 3, [[REG1]], 8 +; CHECK: blr +} + +define i64 @cn32_2() #0 { +entry: + ret i64 4294967295 + +; CHECK-LABEL: @cn32_2 +; CHECK: li [[REG1:[0-9]+]], 0 +; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: blr +} + +define i32 @cn32_2_i32() #0 { +entry: + ret i32 -1 + +; CHECK-LABEL: @cn32_2_i32 +; CHECK: li [[REG1:[0-9]+]], 0 +; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: blr +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll index 6683d925a1b16..bf2abe0b6b837 100644 --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -239,4 +239,26 @@ define double @getNegatedExpression_crash(double %x, double %y) { %fma1 = call reassoc nsz double @llvm.fma.f64(double %fma, double %y, double %add) ret double %fma1 } + +define double @fma_flag_propagation(double %a) { +; CHECK-FAST-LABEL: fma_flag_propagation: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xssubdp 1, 1, 1 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_flag_propagation: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: fsub 1, 1, 1 +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_flag_propagation: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xssubdp 1, 1, 1 +; CHECK-NEXT: blr +entry: + %0 = fneg double %a + %1 = call reassoc nnan double @llvm.fma.f64(double %0, double 1.0, double %a) + ret double %1 +} + declare double @llvm.fma.f64(double, double, double) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll index d8ef98c149f6a..b4927f3da0637 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -403,47 +403,39 @@ entry: define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { ; P8-LABEL: ppcq_to_i32: ; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __gcc_qtou -; P8-NEXT: nop +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 ; P8-NEXT: extsw r3, r3 -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: ppcq_to_i32: ; P9: # %bb.0: # %entry -; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __gcc_qtou -; P9-NEXT: nop +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 ; P9-NEXT: extsw r3, r3 -; P9-NEXT: addi r1, r1, 32 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_i32: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 -; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __gcc_qtou -; NOVSX-NEXT: nop -; NOVSX-NEXT: extsw r3, r3 -; NOVSX-NEXT: addi r1, r1, 32 -; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) ; NOVSX-NEXT: blr entry: %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 @@ -549,12 +541,40 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { ; P8: # %bb.0: # %entry ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: .cfi_def_cfa_offset 128 ; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __fixunstfsi +; P8-NEXT: .cfi_offset r30, -16 +; P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P8-NEXT: xxlxor f3, f3, f3 +; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; P8-NEXT: fcmpo cr0, f2, f3 +; P8-NEXT: lis r3, -32768 +; P8-NEXT: xxlxor f3, f3, f3 +; P8-NEXT: fcmpo cr1, f1, f0 +; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt +; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq +; P8-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P8-NEXT: isel r30, 0, r3, 4*cr5+lt +; P8-NEXT: bc 12, 4*cr5+lt, .LBB11_2 +; P8-NEXT: # %bb.1: # %entry +; P8-NEXT: fmr f3, f0 +; P8-NEXT: .LBB11_2: # %entry +; P8-NEXT: xxlxor f4, f4, f4 +; P8-NEXT: bl __gcc_qsub ; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 +; P8-NEXT: xor r3, r3, r30 +; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: clrldi r3, r3, 32 +; P8-NEXT: addi r1, r1, 128 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -562,28 +582,88 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { ; P9-LABEL: ppcq_to_u32: ; P9: # %bb.0: # %entry ; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_def_cfa_offset 48 ; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __fixunstfsi +; P9-NEXT: .cfi_offset r30, -16 +; P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -48(r1) +; P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P9-NEXT: xxlxor f3, f3, f3 +; P9-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; P9-NEXT: fcmpo cr1, f2, f3 +; P9-NEXT: lis r3, -32768 +; P9-NEXT: fcmpo cr0, f1, f0 +; P9-NEXT: xxlxor f3, f3, f3 +; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt +; P9-NEXT: crandc 4*cr5+gt, lt, eq +; P9-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P9-NEXT: isel r30, 0, r3, 4*cr5+lt +; P9-NEXT: bc 12, 4*cr5+lt, .LBB11_2 +; P9-NEXT: # %bb.1: # %entry +; P9-NEXT: fmr f3, f0 +; P9-NEXT: .LBB11_2: # %entry +; P9-NEXT: xxlxor f4, f4, f4 +; P9-NEXT: bl __gcc_qsub ; P9-NEXT: nop -; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 +; P9-NEXT: xor r3, r3, r30 +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: addi r1, r1, 48 ; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_u32: ; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mfocrf r12, 32 ; NOVSX-NEXT: mflr r0 ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: stw r12, 8(r1) +; NOVSX-NEXT: stdu r1, -48(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 48 ; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: .cfi_offset cr2, 8 +; NOVSX-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; NOVSX-NEXT: addis r4, r2, .LCPI11_1@toc@ha +; NOVSX-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; NOVSX-NEXT: lfs f4, .LCPI11_1@toc@l(r4) +; NOVSX-NEXT: fcmpo cr0, f1, f0 +; NOVSX-NEXT: fcmpo cr1, f2, f4 +; NOVSX-NEXT: fmr f3, f4 +; NOVSX-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt +; NOVSX-NEXT: crandc 4*cr5+gt, lt, eq +; NOVSX-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt +; NOVSX-NEXT: bc 12, 4*cr2+lt, .LBB11_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr f3, f0 +; NOVSX-NEXT: .LBB11_2: # %entry +; NOVSX-NEXT: bl __gcc_qsub ; NOVSX-NEXT: nop -; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, 44 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lis r3, -32768 +; NOVSX-NEXT: lwz r4, 44(r1) +; NOVSX-NEXT: isel r3, 0, r3, 4*cr2+lt +; NOVSX-NEXT: xor r3, r4, r3 +; NOVSX-NEXT: clrldi r3, r3, 32 +; NOVSX-NEXT: addi r1, r1, 48 ; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: lwz r12, 8(r1) +; NOVSX-NEXT: mtocrf 32, r12 ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr entry: @@ -747,12 +827,17 @@ entry: ret fp128 %conv } -define void @fptoint_nofpexcept(fp128 %m, i32* %addr1, i64* %addr2) { +define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2) { ; MIR-LABEL: name: fptoint_nofpexcept ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSWZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUWZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSDZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUDZ +; +; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD +; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS +; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD +; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS entry: %conv1 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 store volatile i32 %conv1, i32* %addr1, align 4 @@ -762,6 +847,11 @@ entry: store volatile i64 %conv3, i64* %addr2, align 8 %conv4 = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 store volatile i64 %conv4, i64* %addr2, align 8 + + %conv5 = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0 + store volatile i32 %conv5, i32* %addr1, align 4 + %conv6 = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0 + store volatile i32 %conv6, i32* %addr1, align 4 ret void } diff --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir index 7c14e7750df90..2f7a85a111ebb 100644 --- a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir +++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir @@ -51,4 +51,4 @@ body: | # # CHECK-PASS-NOT: %2:g8rc = RLDICL killed %1, 0, 32 # CHECK-PASS-NOT: %3:g8rc = RLDICR %2, 2, 61 -# CHECK-PASS: %3:g8rc = RLDIC %1, 2, 30 +# CHECK-PASS: %3:g8rc = RLDIC killed %1, 2, 30 diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole.mir b/llvm/test/CodeGen/PowerPC/mi-peephole.mir index 8bf72461d5453..c7f41cd0bc4c9 100644 --- a/llvm/test/CodeGen/PowerPC/mi-peephole.mir +++ b/llvm/test/CodeGen/PowerPC/mi-peephole.mir @@ -31,7 +31,7 @@ body: | ; CHECK: bb.0.entry: ; CHECK: %1:g8rc = COPY $x4 ; CHECK: %0:g8rc = COPY $x3 - ; CHECK: %3:g8rc = RLDIC %1, 2, 30 + ; CHECK: %3:g8rc = RLDIC killed %1, 2, 30 ; CHECK: $x3 = COPY %3 ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 ... diff --git a/llvm/test/CodeGen/PowerPC/mulli.ll b/llvm/test/CodeGen/PowerPC/mulli.ll new file mode 100644 index 0000000000000..3e417f9720a84 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mulli.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +define i64 @test1(i64 %x) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: li 4, 625 +; CHECK-NEXT: sldi 4, 4, 36 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, 42949672960000 + ret i64 %y +} + +define i64 @test2(i64 %x) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: li 4, -625 +; CHECK-NEXT: sldi 4, 4, 36 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, -42949672960000 + ret i64 %y +} + +define i64 @test3(i64 %x) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 74 +; CHECK-NEXT: ori 4, 4, 16384 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, 4866048 + ret i64 %y +} + +define i64 @test4(i64 %x) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -75 +; CHECK-NEXT: ori 4, 4, 49152 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, -4866048 + ret i64 %y +} + +define i64 @test5(i64 %x) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 16 +; CHECK-NEXT: ori 4, 4, 1 +; CHECK-NEXT: sldi 4, 4, 12 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, 4294971392 + ret i64 %y +} + +define i64 @test6(i64 %x) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -17 +; CHECK-NEXT: ori 4, 4, 65535 +; CHECK-NEXT: sldi 4, 4, 12 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, -4294971392 + ret i64 %y +} diff --git a/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll b/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll index 9f5ae661b5d76..82836f42e1c19 100644 --- a/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll +++ b/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse-memssa -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s +; RUN: opt -early-cse-memssa -earlycse-debug-hash -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s ; ModuleID = 'bugpoint-reduced-simplified.bc' source_filename = "bugpoint-output-8903f29.bc" target datalayout = "e-m:e-i64:64-n32:64" diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll index 2b1cf27c20ec9..637361f7b1c96 100644 --- a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -64,3 +64,59 @@ entry: %ext = tail call i32 @llvm.ppc.altivec.vextractqm(<1 x i128> %a) ret i32 %ext } + +declare <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8>) +declare <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16>) +declare <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32>) +declare <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64>) +declare <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128>) + +define <16 x i8> @test_vexpandbm(<16 x i8> %a) { +; CHECK-LABEL: test_vexpandbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandbm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8> %a) + ret <16 x i8> %exp +} + +define <8 x i16> @test_vexpandhm(<8 x i16> %a) { +; CHECK-LABEL: test_vexpandhm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandhm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16> %a) + ret <8 x i16> %exp +} + +define <4 x i32> @test_vexpandwm(<4 x i32> %a) { +; CHECK-LABEL: test_vexpandwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandwm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32> %a) + ret <4 x i32> %exp +} + +define <2 x i64> @test_vexpanddm(<2 x i64> %a) { +; CHECK-LABEL: test_vexpanddm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpanddm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64> %a) + ret <2 x i64> %exp +} + +define <1 x i128> @test_vexpandqm(<1 x i128> %a) { +; CHECK-LABEL: test_vexpandqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandqm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128> %a) + ret <1 x i128> %exp +} diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll index fd58654d0ae1e..722a4de860c74 100644 --- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -10,6 +10,7 @@ ; This includes the low order and high order versions of vector multiply. ; The low order version operates on doublewords, whereas the high order version ; operates on signed and unsigned words and doublewords. +; This file also includes 128 bit vector multiply instructions. define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vmulld: @@ -122,3 +123,54 @@ entry: %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %mulh } + +declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone + +define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmuleud: +; CHECK: # %bb.0: +; CHECK-NEXT: vmuleud v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmuloud: +; CHECK: # %bb.0: +; CHECK-NEXT: vmuloud v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmulesd: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulesd v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmulosd: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulosd v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone { +; CHECK-LABEL: test_vmsumcud: +; CHECK: # %bb.0: +; CHECK-NEXT: vmsumcud v2, v2, v3, v4 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) + ret <1 x i128> %tmp +} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll b/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll index d58e8c21509a6..dfbcbb3b338be 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll @@ -3,8 +3,9 @@ ; RUN: -enable-ppc-pcrel-tls < %s | FileCheck %s --check-prefix=CHECK-S ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: --relocation-model=pic -mcpu=pwr10 -ppc-asm-full-reg-names \ -; RUN: -enable-ppc-pcrel-tls --filetype=obj < %s | \ -; RUN: llvm-objdump --mcpu=pwr10 -dr - | FileCheck %s --check-prefix=CHECK-O +; RUN: -enable-ppc-pcrel-tls --filetype=obj -o %t.o < %s +; RUN: llvm-objdump --mcpu=pwr10 -dr %t.o |FileCheck %s --check-prefix=CHECK-O +; RUN: llvm-readelf -s %t.o | FileCheck %s --check-prefix=CHECK-SYM ; These test cases are to ensure that when using pc relative memory operations ; ABI correct code and relocations are produced for General Dynamic TLS Model. @@ -45,6 +46,9 @@ define i32 @GeneralDynamicValueLoad() { ; CHECK-O-NEXT: 0000000000000054: R_PPC64_TLSGD x ; CHECK-O-NEXT: 0000000000000054: R_PPC64_REL24_NOTOC __tls_get_addr ; CHECK-O-NEXT: 58: 00 00 63 80 lwz 3, 0(3) + + ; CHECK-SYM-LABEL: Symbol table '.symtab' contains 7 entries + ; CHECK-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT UND x entry: %0 = load i32, i32* @x, align 4 ret i32 %0 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll b/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll index 7789e23515ab4..f10ed538d092c 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll @@ -3,8 +3,9 @@ ; RUN: FileCheck %s --check-prefix=CHECK-S ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names --filetype=obj \ -; RUN: -enable-ppc-pcrel-tls < %s | llvm-objdump --mcpu=pwr10 -dr - | \ -; RUN: FileCheck %s --check-prefix=CHECK-O +; RUN: -enable-ppc-pcrel-tls -o %t.o < %s +; RUN: llvm-objdump --mcpu=pwr10 -dr %t.o | FileCheck %s --check-prefix=CHECK-O +; RUN: llvm-readelf -s %t.o | FileCheck %s --check-prefix=CHECK-SYM ; These test cases are to ensure that when using pc relative memory operations ; ABI correct code and relocations are produced for Initial Exec TLS Model. @@ -42,6 +43,9 @@ define i32 @InitialExecValueLoad() { ; CHECK-O-NEXT: 2e 68 63 7c lwzx 3, 3, 13 ; CHECK-O-NEXT: 0000000000000029: R_PPC64_TLS x ; CHECK-O-NEXT: 20 00 80 4e blr + +; CHECK-SYM-LABEL: Symbol table '.symtab' contains 6 entries +; CHECK-SYM: 5: 0000000000000000 0 TLS GLOBAL DEFAULT UND x entry: %0 = load i32, i32* @x, align 4 ret i32 %0 diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 72c6a137b9afa..5ab12093954f8 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1202,38 +1202,36 @@ entry: define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __gcc_qtou -; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __gcc_qtou -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: blr ; ; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64: # %bb.0: # %entry -; PC64-NEXT: mflr 0 -; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __gcc_qtou -; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 -; PC64-NEXT: ld 0, 16(1) -; PC64-NEXT: mtlr 0 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, -8(1) +; PC64-NEXT: lwz 3, -4(1) ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128( @@ -1289,24 +1287,76 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __fixunstfsi +; PC64LE-NEXT: stdu 1, -48(1) +; PC64LE-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE-NEXT: xxlxor 3, 3, 3 +; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64LE-NEXT: fcmpo 0, 2, 3 +; PC64LE-NEXT: lis 3, -32768 +; PC64LE-NEXT: xxlxor 3, 3, 3 +; PC64LE-NEXT: fcmpo 1, 1, 0 +; PC64LE-NEXT: crand 20, 6, 0 +; PC64LE-NEXT: crandc 21, 4, 6 +; PC64LE-NEXT: cror 20, 21, 20 +; PC64LE-NEXT: isel 30, 0, 3, 20 +; PC64LE-NEXT: bc 12, 20, .LBB31_2 +; PC64LE-NEXT: # %bb.1: # %entry +; PC64LE-NEXT: fmr 3, 0 +; PC64LE-NEXT: .LBB31_2: # %entry +; PC64LE-NEXT: xxlxor 4, 4, 4 +; PC64LE-NEXT: bl __gcc_qsub ; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 +; PC64LE-NEXT: xor 3, 3, 30 +; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 30, -16(1) # 8-byte Folded Spill ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __fixunstfsi +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE9-NEXT: xxlxor 3, 3, 3 +; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64LE9-NEXT: fcmpo 1, 2, 3 +; PC64LE9-NEXT: lis 3, -32768 +; PC64LE9-NEXT: fcmpo 0, 1, 0 +; PC64LE9-NEXT: xxlxor 3, 3, 3 +; PC64LE9-NEXT: crand 20, 2, 4 +; PC64LE9-NEXT: crandc 21, 0, 2 +; PC64LE9-NEXT: cror 20, 21, 20 +; PC64LE9-NEXT: isel 30, 0, 3, 20 +; PC64LE9-NEXT: bc 12, 20, .LBB31_2 +; PC64LE9-NEXT: # %bb.1: # %entry +; PC64LE9-NEXT: fmr 3, 0 +; PC64LE9-NEXT: .LBB31_2: # %entry +; PC64LE9-NEXT: xxlxor 4, 4, 4 +; PC64LE9-NEXT: bl __gcc_qsub ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 +; PC64LE9-NEXT: xor 3, 3, 30 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr ; @@ -1314,12 +1364,45 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64: # %bb.0: # %entry ; PC64-NEXT: mflr 0 ; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __fixunstfsi +; PC64-NEXT: mfcr 12 +; PC64-NEXT: stw 12, 8(1) +; PC64-NEXT: stdu 1, -128(1) +; PC64-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64-NEXT: addis 3, 2, .LCPI31_1@toc@ha +; PC64-NEXT: lfs 4, .LCPI31_1@toc@l(3) +; PC64-NEXT: fcmpo 0, 1, 0 +; PC64-NEXT: crandc 21, 0, 2 +; PC64-NEXT: fcmpo 1, 2, 4 +; PC64-NEXT: crand 20, 2, 4 +; PC64-NEXT: cror 8, 21, 20 +; PC64-NEXT: fmr 3, 4 +; PC64-NEXT: bc 12, 8, .LBB31_2 +; PC64-NEXT: # %bb.1: # %entry +; PC64-NEXT: fmr 3, 0 +; PC64-NEXT: .LBB31_2: # %entry +; PC64-NEXT: bl __gcc_qsub ; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: lis 4, -32768 +; PC64-NEXT: bc 12, 8, .LBB31_3 +; PC64-NEXT: b .LBB31_4 +; PC64-NEXT: .LBB31_3: # %entry +; PC64-NEXT: li 4, 0 +; PC64-NEXT: .LBB31_4: # %entry +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, 120(1) +; PC64-NEXT: lwz 3, 124(1) +; PC64-NEXT: xor 3, 3, 4 +; PC64-NEXT: addi 1, 1, 128 ; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: lwz 12, 8(1) ; PC64-NEXT: mtlr 0 +; PC64-NEXT: mtcrf 32, 12 # cr2 ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128( diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll new file mode 100644 index 0000000000000..d09a5fe8fb0b6 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr47373.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \ +; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s +@a = local_unnamed_addr global float* null, align 8 + +; Function Attrs: nounwind +define void @d() local_unnamed_addr #0 { +; CHECK-LABEL: d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -208(r1) +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: std r30, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: ld r29, 0(r3) +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: cmpwi r30, 1 +; CHECK-NEXT: blt cr0, .LBB0_9 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: cmplwi r30, 4 +; CHECK-NEXT: clrldi r4, r30, 32 +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: blt cr0, .LBB0_7 +; CHECK-NEXT: # %bb.2: # %vector.memcheck +; CHECK-NEXT: rldic r6, r30, 2, 30 +; CHECK-NEXT: add r7, r3, r6 +; CHECK-NEXT: cmpld r29, r7 +; CHECK-NEXT: add r6, r29, r6 +; CHECK-NEXT: bc 4, lt, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %vector.memcheck +; CHECK-NEXT: cmpld r3, r6 +; CHECK-NEXT: bc 12, lt, .LBB0_7 +; CHECK-NEXT: .LBB0_4: # %vector.ph +; CHECK-NEXT: rlwinm r5, r4, 0, 0, 29 +; CHECK-NEXT: li r7, 15 +; CHECK-NEXT: addi r6, r5, -4 +; CHECK-NEXT: addi r8, r1, 144 +; CHECK-NEXT: rldicl r6, r6, 62, 2 +; CHECK-NEXT: addi r9, r1, 128 +; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: addi r10, r1, 160 +; CHECK-NEXT: mtctr r6 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: addi r11, r1, 112 +; CHECK-NEXT: .LBB0_5: # %vector.body +; CHECK-NEXT: # +; CHECK-NEXT: add r12, r3, r6 +; CHECK-NEXT: lvx v3, r3, r6 +; CHECK-NEXT: lvx v5, r12, r7 +; CHECK-NEXT: add r12, r29, r6 +; CHECK-NEXT: lvsl v2, r3, r6 +; CHECK-NEXT: vperm v2, v3, v5, v2 +; CHECK-NEXT: lvx v3, r29, r6 +; CHECK-NEXT: lvx v5, r12, r7 +; CHECK-NEXT: lvsl v4, r29, r6 +; CHECK-NEXT: stvx v2, 0, r8 +; CHECK-NEXT: vperm v2, v3, v5, v4 +; CHECK-NEXT: stvx v2, 0, r9 +; CHECK-NEXT: lfs f0, 156(r1) +; CHECK-NEXT: lfs f1, 140(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: lfs f1, 136(r1) +; CHECK-NEXT: stfs f0, 172(r1) +; CHECK-NEXT: lfs f0, 152(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: lfs f1, 132(r1) +; CHECK-NEXT: stfs f0, 168(r1) +; CHECK-NEXT: lfs f0, 148(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: lfs f1, 128(r1) +; CHECK-NEXT: stfs f0, 164(r1) +; CHECK-NEXT: lfs f0, 144(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: stfs f0, 160(r1) +; CHECK-NEXT: lvx v2, 0, r10 +; CHECK-NEXT: stvx v2, 0, r11 +; CHECK-NEXT: ld r0, 112(r1) +; CHECK-NEXT: stdx r0, r29, r6 +; CHECK-NEXT: addi r6, r6, 16 +; CHECK-NEXT: ld r0, 120(r1) +; CHECK-NEXT: std r0, 8(r12) +; CHECK-NEXT: bdnz .LBB0_5 +; CHECK-NEXT: # %bb.6: # %middle.block +; CHECK-NEXT: cmpld r5, r4 +; CHECK-NEXT: beq cr0, .LBB0_9 +; CHECK-NEXT: .LBB0_7: # %for.body.preheader18 +; CHECK-NEXT: sldi r6, r5, 2 +; CHECK-NEXT: sub r5, r4, r5 +; CHECK-NEXT: addi r6, r6, -4 +; CHECK-NEXT: add r3, r3, r6 +; CHECK-NEXT: add r4, r29, r6 +; CHECK-NEXT: mtctr r5 +; CHECK-NEXT: .LBB0_8: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfsu f0, 4(r4) +; CHECK-NEXT: lfsu f1, 4(r3) +; CHECK-NEXT: fdivs f0, f0, f1 +; CHECK-NEXT: stfs f0, 0(r4) +; CHECK-NEXT: bdnz .LBB0_8 +; CHECK-NEXT: .LBB0_9: # %for.end +; CHECK-NEXT: ld r30, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 208 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load float*, float** @a, align 8 + %call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2 + %call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2 + %cmp11 = icmp sgt i32 %call, 0 + br i1 %cmp11, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %call to i64 + %min.iters.check = icmp ult i32 %call, 4 + br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck + +vector.memcheck: ; preds = %for.body.preheader + %scevgep = getelementptr float, float* %0, i64 %wide.trip.count + %scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count + %bound0 = icmp ult float* %0, %scevgep15 + %bound1 = icmp ult float* %call1, %scevgep + %found.conflict = and i1 %bound0, %bound1 + br i1 %found.conflict, label %for.body.preheader18, label %vector.ph + +vector.ph: ; preds = %vector.memcheck + %n.vec = and i64 %wide.trip.count, 4294967292 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %1 = getelementptr inbounds float, float* %call1, i64 %index + %2 = bitcast float* %1 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %2, align 4 + %3 = getelementptr inbounds float, float* %0, i64 %index + %4 = bitcast float* %3 to <4 x float>* + %wide.load17 = load <4 x float>, <4 x float>* %4, align 4 + %5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load + %6 = bitcast float* %3 to <4 x float>* + store <4 x float> %5, <4 x float>* %6, align 4 + %index.next = add i64 %index, 4 + %7 = icmp eq i64 %index.next, %n.vec + br i1 %7, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.end, label %for.body.preheader18 + +for.body.preheader18: ; preds = %middle.block, %vector.memcheck, %for.body.preheader + %indvars.iv.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader18, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ] + %arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv + %8 = load float, float* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv + %9 = load float, float* %arrayidx3, align 4 + %div = fdiv reassoc nsz arcp afn float %9, %8 + store float %div, float* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %middle.block, %entry + ret void +} + +declare signext i32 @c(...) local_unnamed_addr #1 + +declare float* @b(...) local_unnamed_addr #1 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll new file mode 100644 index 0000000000000..c13d181519964 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s + +define signext i32 @foo(i32 signext %0, i32 signext %1, i32* %2, i32* %3, i32 signext %4) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpwi r7, 1 +; CHECK-NEXT: blt cr0, .LBB0_8 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addi r4, r5, -4 +; CHECK-NEXT: addi r8, r6, -4 +; CHECK-NEXT: clrldi r7, r7, 32 +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: mtctr r7 +; CHECK-NEXT: lis r7, -30584 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: cmplwi r3, 3 +; CHECK-NEXT: cmplwi cr1, r3, 1 +; CHECK-NEXT: ori r7, r7, 34953 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: mulhwu r9, r6, r7 +; CHECK-NEXT: srwi r9, r9, 4 +; CHECK-NEXT: mulli r9, r9, 30 +; CHECK-NEXT: sub r9, r6, r9 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: add r9, r9, r5 +; CHECK-NEXT: stw r9, 4(r8) +; CHECK-NEXT: mr r8, r3 +; CHECK-NEXT: bdz .LBB0_8 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: lwzu r9, 4(r4) +; CHECK-NEXT: addi r3, r8, 4 +; CHECK-NEXT: add r5, r9, r5 +; CHECK-NEXT: beq cr0, .LBB0_7 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: bne cr1, .LBB0_2 +; CHECK-NEXT: # %bb.6: +; CHECK-NEXT: slwi r9, r6, 1 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_7: +; CHECK-NEXT: addi r9, r6, 100 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_8: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr + %6 = icmp sgt i32 %4, 0 + br i1 %6, label %7, label %9 + +7: ; preds = %5 + %8 = zext i32 %4 to i64 + br label %10 + +9: ; preds = %25, %5 + ret i32 undef + +10: ; preds = %7, %25 + %11 = phi i64 [ 0, %7 ], [ %29, %25 ] + %12 = phi i32 [ 0, %7 ], [ %30, %25 ] + %13 = phi i32 [ 0, %7 ], [ %16, %25 ] + %14 = getelementptr inbounds i32, i32* %2, i64 %11 + %15 = load i32, i32* %14, align 4 + %16 = add nsw i32 %15, %13 + switch i32 %0, label %22 [ + i32 1, label %17 + i32 3, label %20 + ] + +17: ; preds = %10 + %18 = trunc i64 %11 to i32 + %19 = shl i32 %18, 1 + br label %25 + +20: ; preds = %10 + %21 = add nuw nsw i32 %12, 100 + br label %25 + +22: ; preds = %10 + %23 = trunc i64 %11 to i32 + %24 = urem i32 %23, 30 + br label %25 + +25: ; preds = %22, %20, %17 + %26 = phi i32 [ %24, %22 ], [ %21, %20 ], [ %19, %17 ] + %27 = add nsw i32 %26, %16 + %28 = getelementptr inbounds i32, i32* %3, i64 %11 + store i32 %27, i32* %28, align 4 + %29 = add nuw nsw i64 %11, 1 + %30 = add nuw nsw i32 %12, 1 + %31 = icmp eq i64 %29, %8 + br i1 %31, label %9, label %10 +} + diff --git a/llvm/test/CodeGen/SPARC/fshl.ll b/llvm/test/CodeGen/SPARC/fshl.ll new file mode 100644 index 0000000000000..d841619457249 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/fshl.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s + +define <2 x i64> @fshl_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: fshl_v2i64: +; CHECK: .cfi_startproc +; CHECK-NEXT: .register %g2, #scratch +; CHECK-NEXT: .register %g3, #scratch +; CHECK-NEXT: ! %bb.0: ! %bb +; CHECK-NEXT: mov 63, %g2 +; CHECK-NEXT: andn %g2, %o4, %g3 +; CHECK-NEXT: srlx %o2, 1, %o2 +; CHECK-NEXT: srlx %o2, %g3, %o2 +; CHECK-NEXT: and %o4, 63, %o4 +; CHECK-NEXT: sllx %o0, %o4, %o0 +; CHECK-NEXT: or %o0, %o2, %o0 +; CHECK-NEXT: andn %g2, %o5, %o2 +; CHECK-NEXT: srlx %o3, 1, %o3 +; CHECK-NEXT: srlx %o3, %o2, %o2 +; CHECK-NEXT: and %o5, 63, %o3 +; CHECK-NEXT: sllx %o1, %o3, %o1 +; CHECK-NEXT: retl +; CHECK-NEXT: or %o1, %o2, %o1 +bb: + %i = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) + ret <2 x i64> %i +} + +define i32 @PR47303() { +; CHECK-LABEL: PR47303: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: ! %bb +; CHECK-NEXT: retl +; CHECK-NEXT: mov 0, %o0 +bb: + %i = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> ) + %i1 = add <4 x i64> %i, zeroinitializer + %i2 = add <4 x i64> %i1, zeroinitializer + %i3 = extractelement <4 x i64> %i2, i32 0 + %i4 = add i64 0, %i3 + %i5 = xor i64 0, %i4 + %i6 = trunc i64 %i5 to i32 + %i7 = mul i32 %i6, 797982799 + ret i32 %i7 +} + +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll new file mode 100644 index 0000000000000..8bab2135739c4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Check that a multiply-and-add results. + +; FIXME: This test is xfailed temporarily +; XFAIL: * + +define void @f1(float %arg, float* %Dst) { +; CHECK-LABEL: f1: +; CHECK: maeb +bb: + %i = fmul contract float %arg, 0xBE6777A5C0000000 + %i4 = fadd contract float %i, 1.000000e+00 + %i5 = fmul contract float %arg, 0xBE6777A5C0000000 + %i6 = fadd contract float %i5, 1.000000e+00 + %i7 = fmul contract float %i4, 2.000000e+00 + store float %i7, float* %Dst + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-60.ll b/llvm/test/CodeGen/SystemZ/int-cmp-60.ll new file mode 100644 index 0000000000000..faae4f9bced23 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-cmp-60.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that DAGCombiner properly clears the NUW/NSW flags on the memoized add +; node. + +define void @fun(i64* %Src, i32* %Dst) { +; CHECK-LABEL: fun: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iilf %r0, 1303940520 +; CHECK-NEXT: n %r0, 4(%r2) +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: afi %r1, 1628135358 +; CHECK-NEXT: locrnhe %r1, %r0 +; CHECK-NEXT: st %r1, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %0 = load i64, i64* %Src, align 8 + %1 = trunc i64 %0 to i32 + %conv = and i32 %1, 1303940520 + %xor11.i = or i32 %conv, -2147483648 + %xor2.i = add i32 %xor11.i, -519348290 + %cmp.i = icmp slt i32 %xor2.i, 0 + %sub3.i = add nuw nsw i32 %conv, 1628135358 + %cond.i = select i1 %cmp.i, i32 %conv, i32 %sub3.i + store i32 %cond.i, i32* %Dst + ret void +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index 6204c06303432..829aabf4b35ca 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -14,7 +14,7 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 -; CHECK: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q2, q1 @@ -30,6 +30,13 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: str.w r0, [r2, r1, lsl #2] ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 4294967228 @ 0xffffffbc +; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 +; CHECK-NEXT: .long 4294967268 @ 0xffffffe4 +; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 entry: ; preds = %middle. %add.us.us = add i32 4, %n %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us @@ -79,7 +86,7 @@ define dso_local void @mve_gatherscatter_offset(i32* noalias nocapture readonly ; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vmov.i32 q0, #0x14 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q3, q2 @@ -154,7 +161,7 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q3, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll index fffafa16c6da2..944505321b244 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -357,3 +357,269 @@ entry: store <2 x i1> %c, <2 x i1>* %dst ret void } + +define arm_aapcs_vfpcc <4 x i32> @load_predcastzext(i16* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_predcastzext: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldrh r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_predcastzext: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldrh r0, [r0] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i16, i16* %i, align 4 + %lz = zext i16 %l to i32 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %lz) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr p0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr p0, [r0] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <8 x i16> @load_predcast8(i32* %i, <8 x i16> %a) { +; CHECK-LE-LABEL: load_predcast8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr p0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_predcast8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vldr p0, [r0] +; CHECK-BE-NEXT: vrev32.16 q0, q0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 4 + %c = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %l) + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <16 x i8> @load_predcast16(i32* %i, <16 x i8> %a) { +; CHECK-LE-LABEL: load_predcast16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr p0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_predcast16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vldr p0, [r0] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.8 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 4 + %c = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %l) + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_align2(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_align2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_align2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 2 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(i16* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_offset: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: adds r0, #6 +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vldr p0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_offset: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: adds r0, #6 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vldr p0, [r0] +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i16, i16* %i, i32 3 + %gb = bitcast i16* %g to i32* + %l = load i32, i32* %gb, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr p0, [r0, #4] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr p0, [r0, #4] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 1 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr p0, [r0, #508] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr p0, [r0, #508] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 127 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr p0, [r0, #-508] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr p0, [r0, #-508] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 -127 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range3: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: add.w r0, r0, #512 +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vldr p0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range3: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: add.w r0, r0, #512 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vldr p0, [r0] +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 128 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range5(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range5: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: sub.w r0, r0, #512 +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vldr p0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range5: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: sub.w r0, r0, #512 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vldr p0, [r0] +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 -128 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index 5669fdf38fee0..ed7e84a899d24 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -24,7 +24,7 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: vmov.i32 q3, #0x4 ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB0_1: @ %do.body +; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: sub.w r12, r12, #4 @@ -48,6 +48,15 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: vstr s8, [r2] ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 entry: %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 1) %1 = extractvalue { <4 x i32>, i32 } %0, 0 diff --git a/llvm/test/CodeGen/WebAssembly/pr47375.ll b/llvm/test/CodeGen/WebAssembly/pr47375.ll new file mode 100644 index 0000000000000..4c04631f26b11 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/pr47375.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; Regression test for pr47375, in which an assertion was triggering +; because WebAssemblyTargetLowering::isVectorLoadExtDesirable was +; improperly assuming the use of simple value types. + +define void @sext_vec() { +; CHECK-LABEL: sext_vec: +; CHECK: .functype sext_vec () -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.load8_u 0 +; CHECK-NEXT: local.set 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.store8 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i32.shl +; CHECK-NEXT: i32.or +; CHECK-NEXT: i32.const 7175 +; CHECK-NEXT: i32.and +; CHECK-NEXT: i32.store16 0 +; CHECK-NEXT: # fallthrough-return + %L1 = load <2 x i3>, <2 x i3>* undef, align 2 + %zext = zext <2 x i3> %L1 to <2 x i10> + store <2 x i10> %zext, <2 x i10>* undef, align 4 + ret void +} diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 345830676abaa..63faafc10ec8d 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -132,13 +132,14 @@ define i64 @test_i64(i64 %a) nounwind { define i128 @test_i128(i128 %a) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: sbbq %rsi, %rdx -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: cmovnsq %rdi, %rax -; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_i128: diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index f29891e6f8a3a..0fe9d0b0d35c8 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -10,13 +10,11 @@ define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a0) { ; CHECK-NEXT: vpabsb %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %arg = bitcast <4 x i64> %a0 to <32 x i8> - %sub = sub <32 x i8> zeroinitializer, %arg - %cmp = icmp sgt <32 x i8> %arg, zeroinitializer - %sel = select <32 x i1> %cmp, <32 x i8> %arg, <32 x i8> %sub - %res = bitcast <32 x i8> %sel to <4 x i64> + %abs = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %arg, i1 false) + %res = bitcast <32 x i8> %abs to <4 x i64> ret <4 x i64> %res } -declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) nounwind readnone define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) { ; CHECK-LABEL: test_mm256_abs_epi16: @@ -24,13 +22,11 @@ define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) { ; CHECK-NEXT: vpabsw %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %arg = bitcast <4 x i64> %a0 to <16 x i16> - %sub = sub <16 x i16> zeroinitializer, %arg - %cmp = icmp sgt <16 x i16> %arg, zeroinitializer - %sel = select <16 x i1> %cmp, <16 x i16> %arg, <16 x i16> %sub - %res = bitcast <16 x i16> %sel to <4 x i64> + %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false) + %res = bitcast <16 x i16> %abs to <4 x i64> ret <4 x i64> %res } -declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) nounwind readnone define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) { ; CHECK-LABEL: test_mm256_abs_epi32: @@ -38,13 +34,11 @@ define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) { ; CHECK-NEXT: vpabsd %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %arg = bitcast <4 x i64> %a0 to <8 x i32> - %sub = sub <8 x i32> zeroinitializer, %arg - %cmp = icmp sgt <8 x i32> %arg, zeroinitializer - %sel = select <8 x i1> %cmp, <8 x i32> %arg, <8 x i32> %sub - %res = bitcast <8 x i32> %sel to <4 x i64> + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %arg, i1 false) + %res = bitcast <8 x i32> %abs to <4 x i64> ret <4 x i64> %res } -declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) nounwind readnone define <4 x i64> @test_mm256_add_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; CHECK-LABEL: test_mm256_add_epi8: diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index 95a2b7e392ba5..70d627b6ed0f5 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -1,24 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL ; fold (abs c1) -> c2 define <4 x i32> @combine_v4i32_abs_constant() { -; CHECK-LABEL: combine_v4i32_abs_constant: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648] -; CHECK-NEXT: retq - %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> ) +; SSE-LABEL: combine_v4i32_abs_constant: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,1,3,2147483648] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v4i32_abs_constant: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648] +; AVX-NEXT: retq + %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> , i1 false) ret <4 x i32> %1 } define <16 x i16> @combine_v16i16_abs_constant() { -; CHECK-LABEL: combine_v16i16_abs_constant: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0] -; CHECK-NEXT: retq - %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> ) +; SSE-LABEL: combine_v16i16_abs_constant: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,1,1,3,3,7,7,255] +; SSE-NEXT: movaps {{.*#+}} xmm1 = [255,4096,4096,32767,32767,32768,32768,0] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v16i16_abs_constant: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0] +; AVX-NEXT: retq + %1 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> , i1 false) ret <16 x i16> %1 } @@ -40,11 +53,24 @@ define i32 @combine_i32_abs_abs(i32 %a) { } define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) { -; CHECK-LABEL: combine_v8i16_abs_abs: -; CHECK: # %bb.0: -; CHECK-NEXT: vpabsw %xmm0, %xmm0 -; CHECK-NEXT: retq - %a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a) +; SSE2-LABEL: combine_v8i16_abs_abs: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psraw $15, %xmm1 +; SSE2-NEXT: paddw %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v8i16_abs_abs: +; SSE42: # %bb.0: +; SSE42-NEXT: pabsw %xmm0, %xmm0 +; SSE42-NEXT: retq +; +; AVX-LABEL: combine_v8i16_abs_abs: +; AVX: # %bb.0: +; AVX-NEXT: vpabsw %xmm0, %xmm0 +; AVX-NEXT: retq + %a1 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 false) %s2 = ashr <8 x i16> %a1, %a2 = add <8 x i16> %a1, %s2 %x2 = xor <8 x i16> %a2, %s2 @@ -52,18 +78,63 @@ define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) { } define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) { -; CHECK-LABEL: combine_v32i8_abs_abs: -; CHECK: # %bb.0: -; CHECK-NEXT: vpabsb %ymm0, %ymm0 -; CHECK-NEXT: retq +; SSE2-LABEL: combine_v32i8_abs_abs: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm0, %xmm3 +; SSE2-NEXT: paddb %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v32i8_abs_abs: +; SSE42: # %bb.0: +; SSE42-NEXT: pabsb %xmm0, %xmm0 +; SSE42-NEXT: pabsb %xmm1, %xmm1 +; SSE42-NEXT: retq +; +; AVX-LABEL: combine_v32i8_abs_abs: +; AVX: # %bb.0: +; AVX-NEXT: vpabsb %ymm0, %ymm0 +; AVX-NEXT: retq %n1 = sub <32 x i8> zeroinitializer, %a %b1 = icmp slt <32 x i8> %a, zeroinitializer %a1 = select <32 x i1> %b1, <32 x i8> %n1, <32 x i8> %a - %a2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a1) + %a2 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a1, i1 false) ret <32 x i8> %a2 } define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { +; SSE2-LABEL: combine_v4i64_abs_abs: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: paddq %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: paddq %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v4i64_abs_abs: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: psubq %xmm0, %xmm4 +; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE42-NEXT: psubq %xmm1, %xmm3 +; SSE42-NEXT: movdqa %xmm1, %xmm0 +; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE42-NEXT: movapd %xmm2, %xmm0 +; SSE42-NEXT: retq +; ; AVX2-LABEL: combine_v4i64_abs_abs: ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -93,31 +164,42 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { ; fold (abs x) -> x iff not-negative define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) { -; CHECK-LABEL: combine_v16i8_abs_constant: -; CHECK: # %bb.0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: combine_v16i8_abs_constant: +; SSE: # %bb.0: +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v16i8_abs_constant: +; AVX: # %bb.0: +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %1 = insertelement <16 x i8> undef, i8 15, i32 0 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer %3 = and <16 x i8> %a, %2 - %4 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %3) + %4 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %3, i1 false) ret <16 x i8> %4 } define <8 x i32> @combine_v8i32_abs_pos(<8 x i32> %a) { -; CHECK-LABEL: combine_v8i32_abs_pos: -; CHECK: # %bb.0: -; CHECK-NEXT: vpsrld $1, %ymm0, %ymm0 -; CHECK-NEXT: retq +; SSE-LABEL: combine_v8i32_abs_pos: +; SSE: # %bb.0: +; SSE-NEXT: psrld $1, %xmm0 +; SSE-NEXT: psrld $1, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v8i32_abs_pos: +; AVX: # %bb.0: +; AVX-NEXT: vpsrld $1, %ymm0, %ymm0 +; AVX-NEXT: retq %1 = lshr <8 x i32> %a, - %2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %1) + %2 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %1, i1 false) ret <8 x i32> %2 } -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone -declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone -declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone -declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) nounwind readnone +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) nounwind readnone +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) nounwind readnone diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll index cc4dee33c619a..7039e33c00935 100644 --- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) { @@ -27,28 +28,43 @@ define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { } define <2 x double> @test2_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test2_x86_sse41_blend_pd: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test2_x86_sse41_blend_pd: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test2_x86_sse41_blend_pd: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 -1) ret <2 x double> %1 } define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test2_x86_sse41_blend_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test2_x86_sse41_blend_ps: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test2_x86_sse41_blend_ps: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 -1) ret <4 x float> %1 } define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: test2_x86_sse41_pblend_w: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test2_x86_sse41_pblend_w: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test2_x86_sse41_pblend_w: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1) ret <8 x i16> %1 } @@ -78,13 +94,18 @@ define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) { } define double @demandedelts_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { -; CHECK-LABEL: demandedelts_blendvpd: -; CHECK: # %bb.0: -; CHECK-NEXT: movapd %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedelts_blendvpd: +; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedelts_blendvpd: +; AVX: # %bb.0: +; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer %2 = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer %3 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer @@ -94,13 +115,18 @@ define double @demandedelts_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x do } define float @demandedelts_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: demandedelts_blendvps: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movaps %xmm3, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedelts_blendvps: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedelts_blendvps: +; AVX: # %bb.0: +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer %2 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer %3 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> zeroinitializer @@ -110,15 +136,22 @@ define float @demandedelts_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float } define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { -; CHECK-LABEL: demandedelts_pblendvb: -; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: movdqa %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: pshufb %xmm0, %xmm3 -; CHECK-NEXT: movdqa %xmm3, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedelts_pblendvb: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pshufb %xmm0, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedelts_pblendvb: +; AVX: # %bb.0: +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer %2 = shufflevector <16 x i8> %a1, <16 x i8> undef, <16 x i32> zeroinitializer %3 = shufflevector <16 x i8> %a2, <16 x i8> undef, <16 x i32> zeroinitializer @@ -128,19 +161,32 @@ define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> } define <2 x i64> @demandedbits_blendvpd(i64 %a0, i64 %a2, <2 x double> %a3) { -; CHECK-LABEL: demandedbits_blendvpd: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq $1, %rax -; CHECK-NEXT: orq $4, %rdi -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: movq %rdi, %xmm2 -; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero -; CHECK-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero -; CHECK-NEXT: blendvpd %xmm0, %xmm2, %xmm1 -; CHECK-NEXT: psrlq $11, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedbits_blendvpd: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: orq $1, %rax +; SSE-NEXT: orq $4, %rdi +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: movq %rdi, %xmm2 +; SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero +; SSE-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero +; SSE-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE-NEXT: psrlq $11, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedbits_blendvpd: +; AVX: # %bb.0: +; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: orq $1, %rax +; AVX-NEXT: orq $4, %rdi +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vmovq %rdi, %xmm2 +; AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero +; AVX-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero +; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT: vpsrlq $11, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = or i64 %a0, 1 %2 = or i64 %a0, 4 %3 = bitcast i64 %1 to double @@ -154,26 +200,36 @@ define <2 x i64> @demandedbits_blendvpd(i64 %a0, i64 %a2, <2 x double> %a3) { } define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { -; CHECK-LABEL: xor_pblendvb: -; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: xor_pblendvb: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: xor_pblendvb: +; AVX: # %bb.0: +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %1 = xor <16 x i8> %a2, %2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1) ret <16 x i8> %2 } define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: xor_blendvps: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: xor_blendvps: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: xor_blendvps: +; AVX: # %bb.0: +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %1 = bitcast <4 x float> %a2 to <4 x i32> %2 = xor <4 x i32> %1, %3 = bitcast <4 x i32> %2 to <4 x float> @@ -182,13 +238,18 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> % } define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { -; CHECK-LABEL: xor_blendvpd: -; CHECK: # %bb.0: -; CHECK-NEXT: movapd %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: xor_blendvpd: +; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: xor_blendvpd: +; AVX: # %bb.0: +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %1 = bitcast <2 x double> %a2 to <4 x i32> %2 = xor <4 x i32> %1, %3 = bitcast <4 x i32> %2 to <2 x double> @@ -196,6 +257,24 @@ define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ret <2 x double> %4 } +define <16 x i8> @PR47404(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { +; SSE-LABEL: PR47404: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: PR47404: +; AVX: # %bb.0: +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %4 = icmp sgt <16 x i8> %2, + %5 = select <16 x i1> %4, <16 x i8> %0, <16 x i8> %1 + ret <16 x i8> %5 +} + declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 338e66622dcd9..f052718d98400 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -120,3 +120,89 @@ define i64 @test_i64(i64 %a) nounwind { ret i64 %abs } +define i128 @test_i128(i128 %a) nounwind { +; X86-NO-CMOV-LABEL: test_i128: +; X86-NO-CMOV: # %bb.0: +; X86-NO-CMOV-NEXT: pushl %ebp +; X86-NO-CMOV-NEXT: pushl %ebx +; X86-NO-CMOV-NEXT: pushl %edi +; X86-NO-CMOV-NEXT: pushl %esi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-CMOV-NEXT: xorl %ecx, %ecx +; X86-NO-CMOV-NEXT: negl %ebp +; X86-NO-CMOV-NEXT: movl $0, %ebx +; X86-NO-CMOV-NEXT: sbbl %edx, %ebx +; X86-NO-CMOV-NEXT: movl $0, %edi +; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NO-CMOV-NEXT: sbbl %esi, %ecx +; X86-NO-CMOV-NEXT: testl %esi, %esi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: js .LBB4_2 +; X86-NO-CMOV-NEXT: # %bb.1: +; X86-NO-CMOV-NEXT: movl %esi, %ecx +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NO-CMOV-NEXT: movl %edx, %ebx +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NO-CMOV-NEXT: .LBB4_2: +; X86-NO-CMOV-NEXT: movl %ebp, (%eax) +; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax) +; X86-NO-CMOV-NEXT: movl %edi, 8(%eax) +; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax) +; X86-NO-CMOV-NEXT: popl %esi +; X86-NO-CMOV-NEXT: popl %edi +; X86-NO-CMOV-NEXT: popl %ebx +; X86-NO-CMOV-NEXT: popl %ebp +; X86-NO-CMOV-NEXT: retl $4 +; +; X86-CMOV-LABEL: test_i128: +; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebp +; X86-CMOV-NEXT: pushl %ebx +; X86-CMOV-NEXT: pushl %edi +; X86-CMOV-NEXT: pushl %esi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-CMOV-NEXT: xorl %esi, %esi +; X86-CMOV-NEXT: negl %edi +; X86-CMOV-NEXT: movl $0, %ebx +; X86-CMOV-NEXT: sbbl %edx, %ebx +; X86-CMOV-NEXT: movl $0, %ebp +; X86-CMOV-NEXT: sbbl %ecx, %ebp +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CMOV-NEXT: sbbl %eax, %esi +; X86-CMOV-NEXT: testl %eax, %eax +; X86-CMOV-NEXT: cmovnsl %eax, %esi +; X86-CMOV-NEXT: cmovnsl %ecx, %ebp +; X86-CMOV-NEXT: cmovnsl %edx, %ebx +; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CMOV-NEXT: movl %edi, (%eax) +; X86-CMOV-NEXT: movl %ebx, 4(%eax) +; X86-CMOV-NEXT: movl %ebp, 8(%eax) +; X86-CMOV-NEXT: movl %esi, 12(%eax) +; X86-CMOV-NEXT: popl %esi +; X86-CMOV-NEXT: popl %edi +; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: popl %ebp +; X86-CMOV-NEXT: retl $4 +; +; X64-LABEL: test_i128: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: retq + %tmp1neg = sub i128 0, %a + %b = icmp sgt i128 %a, -1 + %abs = select i1 %b, i128 %a, i128 %tmp1neg + ret i128 %abs +} + diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 6a9a401264c56..67d8479cf7365 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -299,11 +299,11 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) { ; ; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] -; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [32,1,2,3,4,5,38,7,8,9,10,11,12,13,14,47] +; AVX512F-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx: diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index c5781e8340753..88418fd85fe52 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -3323,14 +3323,13 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) { ; KNL_64-LABEL: splat_ptr_gather: ; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL_64-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_64-NEXT: kshiftlw $12, %k0, %k0 ; KNL_64-NEXT: kshiftrw $12, %k0, %k1 -; KNL_64-NEXT: vmovq %rdi, %xmm0 -; KNL_64-NEXT: vpbroadcastq %xmm0, %ymm0 -; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm1 {%k1} +; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} ; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -3342,8 +3341,9 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr ; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_32-NEXT: kshiftlw $12, %k0, %k0 ; KNL_32-NEXT: kshiftrw $12, %k0, %k1 -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -3352,18 +3352,18 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vpmovd2m %xmm0, %k1 -; SKX-NEXT: vpbroadcastq %rdi, %ymm0 -; SKX-NEXT: vpgatherqd (,%ymm0), %xmm1 {%k1} +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1} ; SKX-NEXT: vmovdqa %xmm1, %xmm0 -; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; SKX_32-LABEL: splat_ptr_gather: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX_32-NEXT: vpmovd2m %xmm0, %k1 -; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; SKX_32-NEXT: vpgatherdd (,%xmm0), %xmm1 {%k1} +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1} ; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 ; SKX_32-NEXT: retl %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 @@ -3376,14 +3376,13 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; KNL_64-LABEL: splat_ptr_scatter: ; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL_64-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_64-NEXT: kshiftlw $12, %k0, %k0 ; KNL_64-NEXT: kshiftrw $12, %k0, %k1 -; KNL_64-NEXT: vmovq %rdi, %xmm0 -; KNL_64-NEXT: vpbroadcastq %xmm0, %ymm0 -; KNL_64-NEXT: vpscatterqd %ymm1, (,%zmm0) {%k1} +; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; @@ -3394,8 +3393,9 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_32-NEXT: kshiftlw $12, %k0, %k0 ; KNL_32-NEXT: kshiftrw $12, %k0, %k1 -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; KNL_32-NEXT: vpscatterdd %zmm1, (,%zmm0) {%k1} +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -3403,17 +3403,17 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vpmovd2m %xmm0, %k1 -; SKX-NEXT: vpbroadcastq %rdi, %ymm0 -; SKX-NEXT: vpscatterqd %xmm1, (,%ymm0) {%k1} -; SKX-NEXT: vzeroupper +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} ; SKX-NEXT: retq ; ; SKX_32-LABEL: splat_ptr_scatter: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX_32-NEXT: vpmovd2m %xmm0, %k1 -; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; SKX_32-NEXT: vpscatterdd %xmm1, (,%xmm0) {%k1} +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_32-NEXT: vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1} ; SKX_32-NEXT: retl %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll b/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll index fc7ce73a441e8..7d1987c1f6a74 100644 --- a/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll +++ b/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -early-cse < %s -S | FileCheck %s +; RUN: opt -mtriple=x86_64-- -early-cse -earlycse-debug-hash < %s -S | FileCheck %s ; CHECK: @foo(x86_mmx bitcast (double 0.000000e+00 to x86_mmx)) diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index f5f8781d80213..732395ee0f2dd 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -1132,28 +1132,50 @@ define void @interleave_24i16_in(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2, ; AVX1-NEXT: vmovdqu %xmm2, 16(%rdi) ; AVX1-NEXT: retq ; -; AVX2-LABEL: interleave_24i16_in: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqu (%rsi), %xmm0 -; AVX2-NEXT: vmovdqu (%rdx), %xmm1 -; AVX2-NEXT: vmovdqu (%rcx), %xmm2 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 -; AVX2-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,u,u,6,7,2,3,u,u,8,9,4,5,u,u,16,17,u,u,22,23,18,19,u,u,24,25,20,21,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27] -; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = -; AVX2-NEXT: vpermd %ymm2, %ymm4, %ymm4 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255] -; AVX2-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 -; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7] -; AVX2-NEXT: vmovdqu %xmm0, 32(%rdi) -; AVX2-NEXT: vmovdqu %ymm3, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: interleave_24i16_in: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovdqu (%rsi), %xmm0 +; AVX2-SLOW-NEXT: vmovdqu (%rdx), %xmm1 +; AVX2-SLOW-NEXT: vmovdqu (%rcx), %xmm2 +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,u,u,6,7,2,3,u,u,8,9,4,5,u,u,16,17,u,u,22,23,18,19,u,u,24,25,20,21,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15] +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX2-SLOW-NEXT: vpermd %ymm2, %ymm4, %ymm4 +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255] +; AVX2-SLOW-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7] +; AVX2-SLOW-NEXT: vmovdqu %xmm0, 32(%rdi) +; AVX2-SLOW-NEXT: vmovdqu %ymm3, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: interleave_24i16_in: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqu (%rsi), %xmm0 +; AVX2-FAST-NEXT: vmovdqu (%rdx), %xmm1 +; AVX2-FAST-NEXT: vmovdqu (%rcx), %xmm2 +; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm4 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [0,4,1,5,1,5,2,6] +; AVX2-FAST-NEXT: vpermd %ymm3, %ymm5, %ymm3 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,1,4,5,u,u,2,3,6,7,u,u,8,9,12,13,u,u,18,19,22,23,u,u,24,25,28,29,u,u,26,27] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255] +; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u] +; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] +; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7] +; AVX2-FAST-NEXT: vmovdqu %xmm0, 32(%rdi) +; AVX2-FAST-NEXT: vmovdqu %ymm3, (%rdi) +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq ; ; XOP-LABEL: interleave_24i16_in: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll index 51df4c0505b54..3f6b85c97c400 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll @@ -71,13 +71,12 @@ define void @shuffle_v16i16_to_v8i16_1(<16 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v8i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [1,3,5,7,33,35,37,39] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v8i16_1: @@ -252,13 +251,12 @@ define void @shuffle_v16i16_to_v4i16_1(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [1,5,33,37,4,5,36,37] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovq %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16_1: @@ -329,13 +327,12 @@ define void @shuffle_v16i16_to_v4i16_2(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16_2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [2,6,34,38,2,3,34,35] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovq %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16_2: @@ -406,13 +403,12 @@ define void @shuffle_v16i16_to_v4i16_3(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16_3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [3,7,35,39,2,3,34,35] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovq %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16_3: diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll index 40cd2fcd4fdeb..d789f0e1d39f7 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll @@ -31,13 +31,13 @@ define void @shuffle_v64i8_to_v32i8_1(<64 x i8>* %L, <32 x i8>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v64i8_to_v32i8_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512BW-NEXT: vmovdqa %ymm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,9,11] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %ymm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -85,9 +85,10 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind ; ; AVX512BW-LABEL: shuffle_v32i16_to_v16i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,17,19,21,23,9,11,13,15,25,27,29,31] +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,33,35,37,39,9,11,13,15,41,43,45,47] ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm1[0,2,1,3] ; AVX512BW-NEXT: vmovdqa %ymm0, (%rsi) ; AVX512BW-NEXT: vzeroupper @@ -128,10 +129,11 @@ define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i32_to_v8i32_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovaps (%rdi), %ymm0 -; AVX512BW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],mem[1,3],ymm0[5,7],mem[5,7] -; AVX512BW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512BW-NEXT: vmovaps %ymm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,17,19,21,23] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %ymm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -258,9 +260,10 @@ define void @shuffle_v32i16_to_v8i16_1(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <1,5,9,13,33,37,41,45,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -316,9 +319,10 @@ define void @shuffle_v32i16_to_v8i16_2(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <2,6,10,14,18,22,26,30,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <2,6,10,14,34,38,42,46,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -374,9 +378,10 @@ define void @shuffle_v32i16_to_v8i16_3(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <3,7,11,15,19,23,27,31,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <3,7,11,15,35,39,43,47,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index e4be8f5a273be..e6821daa97ca3 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -327,8 +327,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -412,8 +412,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -455,9 +455,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512BW-LABEL: PR34175: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512BW-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512BW-NEXT: retq @@ -473,9 +474,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512VBMI-LABEL: PR34175: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 -; AVX512VBMI-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2 +; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512VBMI-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512VBMI-NEXT: retq diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll index b0529640eb1d2..6540313a891eb 100644 --- a/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll @@ -19,13 +19,11 @@ define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { ; AVX-NEXT: vpabsb %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <16 x i8> - %sub = sub <16 x i8> zeroinitializer, %arg - %cmp = icmp sgt <16 x i8> %arg, zeroinitializer - %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub - %res = bitcast <16 x i8> %sel to <2 x i64> + %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %arg, i1 false) + %res = bitcast <16 x i8> %abs to <2 x i64> ret <2 x i64> %res } -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_abs_epi16: @@ -38,13 +36,11 @@ define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { ; AVX-NEXT: vpabsw %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <8 x i16> - %sub = sub <8 x i16> zeroinitializer, %arg - %cmp = icmp sgt <8 x i16> %arg, zeroinitializer - %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub - %res = bitcast <8 x i16> %sel to <2 x i64> + %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false) + %res = bitcast <8 x i16> %abs to <2 x i64> ret <2 x i64> %res } -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { ; SSE-LABEL: test_mm_abs_epi32: @@ -57,13 +53,11 @@ define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { ; AVX-NEXT: vpabsd %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <4 x i32> - %sub = sub <4 x i32> zeroinitializer, %arg - %cmp = icmp sgt <4 x i32> %arg, zeroinitializer - %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub - %res = bitcast <4 x i32> %sel to <2 x i64> + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false) + %res = bitcast <4 x i32> %abs to <2 x i64> ret <2 x i64> %res } -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_alignr_epi8: diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll new file mode 100644 index 0000000000000..8ea56d29b8d06 --- /dev/null +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 + +define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { +; SSE-LABEL: test_x86_ssse3_pabs_b_128: +; SSE: ## %bb.0: +; SSE-NEXT: pabsb %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1c,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_ssse3_pabs_b_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_ssse3_pabs_b_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { +; SSE-LABEL: test_x86_ssse3_pabs_d_128: +; SSE: ## %bb.0: +; SSE-NEXT: pabsd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1e,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_ssse3_pabs_d_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_ssse3_pabs_d_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { +; SSE-LABEL: test_x86_ssse3_pabs_w_128: +; SSE: ## %bb.0: +; SSE-NEXT: pabsw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1d,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_ssse3_pabs_w_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_ssse3_pabs_w_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll index 629a759332a93..ac386abd17806 100644 --- a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll @@ -6,69 +6,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 -define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { -; SSE-LABEL: test_x86_ssse3_pabs_b_128: -; SSE: ## %bb.0: -; SSE-NEXT: pabsb %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1c,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_ssse3_pabs_b_128: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_ssse3_pabs_b_128: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone - - -define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { -; SSE-LABEL: test_x86_ssse3_pabs_d_128: -; SSE: ## %bb.0: -; SSE-NEXT: pabsd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1e,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_ssse3_pabs_d_128: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_ssse3_pabs_d_128: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone - - -define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { -; SSE-LABEL: test_x86_ssse3_pabs_w_128: -; SSE: ## %bb.0: -; SSE-NEXT: pabsw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1d,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_ssse3_pabs_w_128: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_ssse3_pabs_w_128: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone - - define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_ssse3_phadd_d_128: ; SSE: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll b/llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll new file mode 100644 index 0000000000000..d881b6cfae3b1 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64 -O0 < %s | FileCheck %s + +; Check that we don't crash on this input. +; CHECK-LABEL: @foo +; CHECK: __stack_chk_guard +; CHECK: retq +define hidden void @foo(i8** %ptr) #0 { +entry: + %args.addr = alloca i8*, align 8 + %0 = va_arg i8** %args.addr, i8* + store i8* %0, i8** %ptr + ret void +} + +attributes #0 = { sspstrong } +attributes #1 = { optsize } + diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 85a086503410e..14dd43ed71a46 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -479,6 +479,36 @@ define void @trunc_i32_to_i16(i32 %x, i16* %p) { ret void } +define void @be_i32_to_i16(i32 %x, i16* %p0) { +; CHECK-LABEL: be_i32_to_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i32 %x, 16 + %t0 = trunc i32 %x to i16 + %t1 = trunc i32 %sh1 to i16 + %p1 = getelementptr inbounds i16, i16* %p0, i64 1 + store i16 %t0, i16* %p1, align 2 + store i16 %t1, i16* %p0, align 2 + ret void +} + +define void @be_i32_to_i16_order(i32 %x, i16* %p0) { +; CHECK-LABEL: be_i32_to_i16_order: +; CHECK: # %bb.0: +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i32 %x, 16 + %t0 = trunc i32 %x to i16 + %t1 = trunc i32 %sh1 to i16 + %p1 = getelementptr inbounds i16, i16* %p0, i64 1 + store i16 %t1, i16* %p0, align 2 + store i16 %t0, i16* %p1, align 2 + ret void +} + define void @trunc_i64_to_i8(i64 %x, i8* %p) { ; CHECK-LABEL: trunc_i64_to_i8: ; CHECK: # %bb.0: @@ -552,3 +582,33 @@ define void @trunc_i64_to_i32(i64 %x, i32* %p) { store i32 %t2, i32* %p1, align 4 ret void } + +define void @be_i64_to_i32(i64 %x, i32* %p0) { +; CHECK-LABEL: be_i64_to_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i64 %x, 32 + %t0 = trunc i64 %x to i32 + %t1 = trunc i64 %sh1 to i32 + %p1 = getelementptr inbounds i32, i32* %p0, i64 1 + store i32 %t0, i32* %p1, align 4 + store i32 %t1, i32* %p0, align 4 + ret void +} + +define void @be_i64_to_i32_order(i64 %x, i32* %p0) { +; CHECK-LABEL: be_i64_to_i32_order: +; CHECK: # %bb.0: +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i64 %x, 32 + %t0 = trunc i64 %x to i32 + %t1 = trunc i64 %sh1 to i32 + %p1 = getelementptr inbounds i32, i32* %p0, i64 1 + store i32 %t1, i32* %p0, align 4 + store i32 %t0, i32* %p1, align 4 + ret void +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 1035dfa1e660a..ec775e9155721 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -393,10 +393,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: @@ -416,10 +414,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0 -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -439,10 +435,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: @@ -462,10 +456,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -482,19 +474,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # %bb.0: @@ -513,9 +497,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -532,19 +515,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # %bb.0: @@ -563,9 +538,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -584,7 +558,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; @@ -605,7 +579,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -625,7 +599,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; @@ -646,7 +620,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -666,7 +640,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; @@ -687,7 +661,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -707,8 +681,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -728,8 +702,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3420,9 +3394,8 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1 ; ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: @@ -3441,9 +3414,8 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3548,22 +3520,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-FAST-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: ; AVX512VL: # %bb.0: @@ -3582,11 +3543,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] -; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3616,10 +3574,9 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_1 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,u,u,16,17,16,17,16,17,16,17,24,25,24,25,24,25,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-FAST-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21] ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: @@ -3662,9 +3619,8 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1 ; ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: @@ -3683,9 +3639,8 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3704,9 +3659,8 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1 ; ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: @@ -3725,9 +3679,8 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3796,13 +3749,20 @@ define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: ; AVX512VL: # %bb.0: @@ -4018,13 +3978,20 @@ define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: ; AVX512VL: # %bb.0: @@ -4062,13 +4029,20 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,2,3,0,1,6,7,8,9,18,19,16,17,22,23,20,21,18,19,16,17,22,23,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: ; AVX512VL: # %bb.0: @@ -4106,13 +4080,20 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: ; AVX512VL: # %bb.0: @@ -4150,13 +4131,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: ; AVX512VL: # %bb.0: @@ -4194,13 +4182,20 @@ define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: ; AVX512VL: # %bb.0: @@ -4364,12 +4359,19 @@ define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7] -; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,0,5,7,6,4,5] +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,2,3,6,7,10,11,0,1,4,5,14,15,16,17,16,17,18,19,22,23,26,27,16,17,20,21,30,31] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: ; AVX512VL: # %bb.0: @@ -4406,13 +4408,20 @@ define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,4,5,4,5,4,5,8,9,16,17,16,17,20,21,20,21,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4450,13 +4459,20 @@ define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,0,1,0,1,0,1,8,9,16,17,16,17,20,21,20,21,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4494,13 +4510,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,4,5,4,5,8,9,16,17,20,21,20,21,16,17,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4538,13 +4561,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,0,1,0,1,8,9,16,17,20,21,20,21,16,17,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: ; AVX512VL: # %bb.0: @@ -4593,9 +4623,9 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_1 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,u,u,16,17,24,25,24,25,16,17,24,25,26,27,28,29,u,u] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,3,7,4,6,7,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,6,7,8,9,14,15,16,17,20,21,20,21,16,17,20,21,22,23,24,25,26,27] ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: @@ -4635,13 +4665,20 @@ define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,4,5,4,5,4,5,4,5,4,5,8,9,16,17,u,u,20,21,20,21,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4679,13 +4716,20 @@ define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,u,u,4,5,0,1,0,1,0,1,8,9,16,17,16,17,u,u,20,21,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4723,13 +4767,20 @@ define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,0,1,4,5,0,1,0,1,0,1,8,9,u,u,16,17,16,17,20,21,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4800,13 +4851,13 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1 ; ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,8,9,4,5,6,11,12,13,8,9,12,13,14,11] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; @@ -4821,8 +4872,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -4984,12 +5035,19 @@ define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_1 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7] -; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,0,6,5,7,4,6] +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,8,9,0,1,6,7,2,3,14,15,18,19,22,23,26,27,24,25,16,17,22,23,18,19,30,31] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: ; AVX512VL: # %bb.0: @@ -5028,9 +5086,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2 ; ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15] -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: @@ -5051,9 +5110,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15] -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5122,10 +5182,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3] -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: @@ -5147,10 +5206,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5235,9 +5293,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_3 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,2,3,12,13,12,13,u,u,u,u,16,17,16,17,20,21,18,19,28,29,28,29,u,u,u,u] -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6],ymm2[7] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,7,u,4,7,u,u> +; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u] ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-FAST-NEXT: retq @@ -5300,9 +5358,9 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_2 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,0,1,u,u,2,3,u,u,24,25,u,u,26,27,u,u,16,17,u,u,18,19] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <2,0,4,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u] ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-FAST-NEXT: retq @@ -5354,19 +5412,18 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2 ; ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21,u,u,u,u] +; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-FAST-NEXT: retq ; @@ -5389,11 +5446,11 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2 ; ; XOPAVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] -; XOPAVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7] +; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5412,9 +5469,10 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1 ; ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15] -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: @@ -5436,9 +5494,10 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15] -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5458,9 +5517,8 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6],ymm0[7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; AVX2-NEXT: retq ; @@ -5484,9 +5542,8 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1 ; XOPAVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6],ymm0[7] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -7278,11 +7335,10 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; AVX2-SLOW-LABEL: PR24935: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,8,9,8,9,8,9,0,1,14,15,12,13,0,1,24,25,24,25,24,25,24,25,16,17,30,31,28,29,16,17] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17] ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,4,5,4,5,4,5,22,23,20,21,16,17,26,27,20,21,20,21,20,21,20,21] -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> -; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-SLOW-NEXT: vpor %ymm2, %ymm1, %ymm1 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15] ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13] ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] @@ -7294,17 +7350,13 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; AVX2-FAST-LABEL: PR24935: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,8,9,8,9,8,9,0,1,14,15,12,13,0,1,24,25,24,25,24,25,24,25,16,17,30,31,28,29,16,17] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,4,5,4,5,4,5,22,23,20,21,16,17,26,27,20,21,20,21,20,21,20,21] -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> -; AVX2-FAST-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,2,3,2,3,u,u,10,11,u,u,u,u,u,u,u,u,18,19,18,19,u,u,26,27,u,u,u,u,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15] -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255] -; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,u,u,0,4,6,2> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[2,3],zero,zero,zero,zero,zero,zero,ymm0[6,7],zero,zero,ymm0[18,19,22,23],zero,zero,zero,zero,ymm0[26,27,28,29,16,17],zero,zero +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <5,6,3,0,0,6,4,u> +; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1],zero,zero,ymm1[6,7,0,1,10,11],zero,zero,ymm1[12,13],zero,zero,zero,zero,ymm1[16,17,20,21],zero,zero,zero,zero,zero,zero,ymm1[24,25] +; AVX2-FAST-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: PR24935: @@ -7330,11 +7382,10 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; XOPAVX2-LABEL: PR24935: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,8,9,8,9,8,9,0,1,14,15,12,13,0,1,24,25,24,25,24,25,24,25,16,17,30,31,28,29,16,17] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,4,5,4,5,4,5,22,23,20,21,16,17,26,27,20,21,20,21,20,21,20,21] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> -; XOPAVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; XOPAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15] ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 82d1997cddfa9..a7e65f10a3604 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -800,21 +800,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16] -; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VLBW-NEXT: movl $-2147450880, %eax # imm = 0x80008000 -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -834,11 +827,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm1 -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -857,19 +847,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: movl $1, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -889,10 +874,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -911,17 +894,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,9,u,u,u,u,u,u,0,u,u,u,u,u,u,u> -; AVX512VLBW-NEXT: vpermw %ymm0, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -941,10 +921,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -963,17 +941,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,9,u,u,u,u,u,u,0,u,u,u,u,u,u,u> -; AVX512VLBW-NEXT: vpermw %ymm0, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -993,10 +968,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1013,33 +986,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1058,9 +1015,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1077,33 +1033,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1122,9 +1062,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1141,33 +1080,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1186,9 +1109,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1205,33 +1127,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1250,9 +1156,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1271,13 +1176,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1298,7 +1203,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1318,13 +1223,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1345,7 +1250,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1365,13 +1270,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1392,7 +1297,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1412,13 +1317,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1439,7 +1344,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1459,13 +1364,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1486,7 +1391,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1506,13 +1411,13 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1533,7 +1438,7 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1553,13 +1458,13 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1580,7 +1485,7 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1600,16 +1505,14 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -1629,9 +1532,8 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; XOPAVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -2891,18 +2793,16 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; AVX512VLBW-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] +; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: @@ -2922,10 +2822,9 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -2943,18 +2842,16 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; AVX512VLBW-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] +; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: @@ -2974,10 +2871,9 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero +; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -3366,37 +3262,61 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0> -; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; AVX2-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: retq ; -; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] -; AVX512VLBW-NEXT: movl $-222248896, %eax # imm = 0xF2C0C040 -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] -; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] -; AVX512VLBW-NEXT: movl $134948620, %eax # imm = 0x80B270C -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 -; AVX512VLBW-NEXT: retq +; AVX2-FAST-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero +; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; AVX2-FAST-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <3,4,5,7,5,4,1,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-FAST-NEXT: retq +; +; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX512VLBW-SLOW: # %bb.0: +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] +; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero +; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] +; AVX512VLBW-SLOW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-SLOW-NEXT: movl $134948620, %eax # imm = 0x80B270C +; AVX512VLBW-SLOW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-SLOW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} +; AVX512VLBW-SLOW-NEXT: retq +; +; AVX512VLBW-FAST-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX512VLBW-FAST: # %bb.0: +; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <3,4,5,7,5,4,1,u> +; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2 +; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero +; AVX512VLBW-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] +; AVX512VLBW-FAST-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-FAST-NEXT: movl $134948620, %eax # imm = 0x80B270C +; AVX512VLBW-FAST-NEXT: kmovd %eax, %k1 +; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u] +; AVX512VLBW-FAST-NEXT: retq + ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; AVX512VLVBMI: # %bb.0: @@ -3422,11 +3342,10 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; ; XOPAVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0> -; XOPAVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] @@ -4415,11 +4334,8 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_ ; ; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: @@ -4438,11 +4354,8 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -4879,17 +4792,16 @@ define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-LABEL: PR28136: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[8,u,10,u,12,u,14,u,9,u,11,u,13,u,15,u] -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,2,u,4,u,6,u,1,u,3,u,5,u,7,u] -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,0,u,2,u,4,u,6,u,1,u,3,u,5,u,7] -; AVX1-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero,xmm1[9],zero,xmm1[11],zero,xmm1[13],zero,xmm1[15] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero,xmm0[9],zero,xmm0[11],zero,xmm0[13],zero,xmm0[15],zero +; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[1],zero,xmm1[3],zero,xmm1[5],zero,xmm1[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[1],zero,xmm0[3],zero,xmm0[5],zero,xmm0[7],zero +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -4921,15 +4833,12 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[8,u,10,u,12,u,14,u,9,u,11,u,13,u,15,u] -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; XOPAVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2 -; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,2,u,4,u,6,u,1,u,3,u,5,u,7,u] +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] +; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8],xmm2[1],xmm0[10],xmm2[3],xmm0[12],xmm2[5],xmm0[14],xmm2[7],xmm0[9],xmm2[9],xmm0[11],xmm2[11],xmm0[13],xmm2[13],xmm0[15],xmm2[15] ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,0,u,2,u,4,u,6,u,1,u,3,u,5,u,7] -; XOPAVX1-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7],xmm0[1],xmm1[9],xmm0[3],xmm1[11],xmm0[5],xmm1[13],xmm0[7],xmm1[15] ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; @@ -4958,22 +4867,14 @@ define <32 x i8> @PR47262(<4 x i64> %a0) { ; ; AVX2-LABEL: PR47262: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: PR47262: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] -; AVX512VLBW-NEXT: movw $21930, %ax # imm = 0x55AA -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: PR47262: @@ -4992,11 +4893,8 @@ define <32 x i8> @PR47262(<4 x i64> %a0) { ; ; XOPAVX2-LABEL: PR47262: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] ; XOPAVX2-NEXT: retq %t1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> %t2 = bitcast <4 x i64> %t1 to <32 x i8> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index e9f4aa99f148b..7d3e8f66ed394 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -378,11 +378,10 @@ define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) { ; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10: ; ALL: # %bb.0: -; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; ALL-NEXT: vmovsldup {{.*#+}} xmm1 = xmm1[0,0,2,2] -; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,u] -; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7] +; ALL-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,6,7,10,0,1,2,3,4,6,7,10] +; ALL-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] +; ALL-NEXT: vpermd %zmm0, %zmm1, %zmm0 +; ALL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; ALL-NEXT: retq %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> ret <8 x float> %res @@ -764,3 +763,47 @@ define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_ %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> ret <16 x float> %res } + +%struct.foo = type { [4 x double], [3 x [4 x double]], [4 x double] } + +; This test previously hung in shuffle combining. https://github.com/ispc/ispc/issues/1864 +define void @ispc_1864(<16 x float>* %arg) { +; ALL-LABEL: ispc_1864: +; ALL: # %bb.0: # %bb +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .cfi_def_cfa_offset 16 +; ALL-NEXT: .cfi_offset %rbp, -16 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .cfi_def_cfa_register %rbp +; ALL-NEXT: andq $-64, %rsp +; ALL-NEXT: subq $4864, %rsp # imm = 0x1300 +; ALL-NEXT: vbroadcastss {{.*#+}} ymm0 = [-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0] +; ALL-NEXT: vmulps 32(%rdi), %ymm0, %ymm0 +; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 +; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,0,1,0,1] +; ALL-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp) +; ALL-NEXT: movq %rbp, %rsp +; ALL-NEXT: popq %rbp +; ALL-NEXT: .cfi_def_cfa %rsp, 8 +; ALL-NEXT: vzeroupper +; ALL-NEXT: retq +bb: + %tmp = alloca [30 x %struct.foo], align 64 + %tmp1 = load <16 x float>, <16 x float>* %arg, align 4 + %tmp2 = fmul <16 x float> %tmp1, + %tmp3 = fpext <16 x float> %tmp2 to <16 x double> + %tmp4 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 0 + %tmp5 = extractelement <16 x double> %tmp3, i32 10 + store double %tmp5, double* %tmp4, align 32 + %tmp6 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 1 + %tmp7 = extractelement <16 x double> %tmp3, i32 11 + store double %tmp7, double* %tmp6, align 8 + %tmp8 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 2 + %tmp9 = extractelement <16 x double> %tmp3, i32 12 + store double %tmp9, double* %tmp8, align 16 + %tmp10 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 3 + %tmp11 = extractelement <16 x double> %tmp3, i32 13 + store double %tmp11, double* %tmp10, align 8 + ret void +} + diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index 4c8073614d6dd..ac6701b383f25 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -66,18 +66,16 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1 ; KNL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38: ; KNL: ## %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; KNL-NEXT: vpermq {{.*#+}} ymm3 = ymm2[2,3,0,1] -; KNL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5],ymm2[6],ymm3[7],ymm2[8,9,10,11],ymm3[12,13],ymm2[14],ymm3[15] -; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u] -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm4 -; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5,6,7],ymm0[8,9,10,11,12],ymm4[13,14,15] -; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u] +; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3] +; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u] +; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3] +; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,u,u,4,5,u,u,2,3,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u] ; KNL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7],ymm0[8],ymm3[9],ymm0[10],ymm3[11],ymm0[12],ymm3[13],ymm0[14],ymm3[15] ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm1 ; KNL-NEXT: vpbroadcastw %xmm1, %ymm1 ; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5,6],ymm1[7],ymm3[8,9,10,11,12,13,14],ymm1[15] ; KNL-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] -; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17] +; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17] ; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; KNL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index cb2dd3ef7e86d..ccf1476e6a657 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -35,11 +35,11 @@ define <8 x float> @expand1(<4 x float> %a ) { ; ; KNL-LABEL: expand1: ; KNL: # %bb.0: -; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; KNL-NEXT: vmovaps {{.*#+}} ymm1 = -; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] +; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL-NEXT: vmovaps {{.*#+}} ymm1 = [16,0,18,1,20,2,22,3] +; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> ret <8 x float> %res @@ -268,10 +268,11 @@ define <8 x float> @expand14(<4 x float> %a) { ; ; KNL-LABEL: expand14: ; KNL: # %bb.0: -; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; KNL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] +; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL-NEXT: vmovaps {{.*#+}} ymm1 = [16,17,0,19,1,21,22,23] +; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: ret{{[l|q]}} %addV = fadd <4 x float> , %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> @@ -476,9 +477,11 @@ define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2 ; ; KNL64-LABEL: test_masked_permps_v8f32: ; KNL64: # %bb.0: -; KNL64-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7] -; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3] -; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7] +; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; KNL64-NEXT: vmovaps (%rdi), %ymm1 +; KNL64-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] +; KNL64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 +; KNL64-NEXT: vmovaps %ymm1, %ymm0 ; KNL64-NEXT: retq ; ; SKX32-LABEL: test_masked_permps_v8f32: @@ -492,10 +495,12 @@ define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2 ; ; KNL32-LABEL: test_masked_permps_v8f32: ; KNL32: # %bb.0: +; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL32-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7] -; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3] -; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7] +; KNL32-NEXT: vmovaps (%eax), %ymm1 +; KNL32-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] +; KNL32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 +; KNL32-NEXT: vmovaps %ymm1, %ymm0 ; KNL32-NEXT: retl %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 6ffbe095c39ba..47c1e67e096a0 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -132,21 +132,40 @@ define <8 x float> @combine_vpermilvar_vperm2f128_8f32(<8 x float> %a0) { } define <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) { -; CHECK-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] -; CHECK-NEXT: ret{{[l|q]}} +; AVX-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: +; AVX: # %bb.0: +; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; AVX-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [16,17,18,19,3,2,1,0] +; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; AVX512-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; AVX512-NEXT: ret{{[l|q]}} %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> ) %2 = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <8 x i32> %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> ) ret <8 x float> %3 } -define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) { -; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: +define <4 x double> @combine_vperm2f128_vpermilvar_as_vperm2f128(<4 x double> %a0) { +; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vperm2f128: +; CHECK: # %bb.0: +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; CHECK-NEXT: ret{{[l|q]}} + %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) + %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> + %3 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %2, <4 x i64> ) + ret <4 x double> %3 +} + +define <4 x double> @combine_vperm2f128_vpermilvar_as_vmovaps(<4 x double> %a0) { +; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vmovaps: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; CHECK-NEXT: vmovaps %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 785202c88b6da..e7287162dfcb8 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -807,8 +807,8 @@ entry: define <32 x i8> @PR27320(<8 x i32> %a0) { ; CHECK-LABEL: PR27320: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1] -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,1,2,3,4,4,5,6,7,7,8,9,10,10,11,28,29,29,30,31,16,16,17,18,19,19,20,21,22,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,1,2,3,4,4,5,6,7,7,8,9,10,10,11,20,21,21,22,23,24,24,25,26,27,27,28,29,30,30,31] ; CHECK-NEXT: ret{{[l|q]}} %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %2 = bitcast <8 x i32> %1 to <32 x i8> @@ -817,15 +817,26 @@ define <32 x i8> @PR27320(<8 x i32> %a0) { } define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1, <8 x float> %inp2) { -; CHECK-LABEL: PR34577: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = -; CHECK-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; CHECK-NEXT: ret{{[l|q]}} +; AVX2-LABEL: PR34577: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] +; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = +; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] +; AVX2-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: PR34577: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] +; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7] +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = <23,18,7,2,20,u,3,2> +; AVX512-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: ret{{[l|q]}} entry: %shuf0 = shufflevector <8 x float> %inp0, <8 x float> %inp2, <8 x i32> %sel = select <8 x i1> , <8 x float> %shuf0, <8 x float> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 6420a62ff0baf..8bdd2451434e6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3248,57 +3248,21 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) { ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: PR45604: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa (%rsi), %xmm0 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,0,0,0,0,0,0,0,0] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm1[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,0,2,1,4,4,6,5] -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11] -; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm2, %ymm4, %ymm2 -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm5 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,1,3,4,5,5,7] -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm7 = ymm6[2,0,3,1,4,5,6,7,10,8,11,9,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm7 = ymm7[0,1,1,3,4,5,5,7] -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm8 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u> -; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm5, %ymm7, %ymm5 -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] -; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1 -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,3,3,6,5,7,7] -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm3 = ymm6[0,1,2,3,6,4,7,5,8,9,10,11,14,12,15,13] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[2,1,3,3,6,5,7,7] -; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm0, %ymm3, %ymm0 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0],ymm2[1],ymm5[2],ymm2[3],ymm5[4],ymm2[5],ymm5[6],ymm2[7] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX2-SLOW-NEXT: vmovdqu %ymm0, 32(%rdi) -; AVX2-SLOW-NEXT: vmovdqu %ymm2, (%rdi) -; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: PR45604: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa (%rsi), %xmm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,4,5,u,u,u,u,2,3,6,7,u,u,u,u,16,17,20,21,u,u,u,u,18,19,22,23,u,u,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[4,5,0,1,u,u,u,u,6,7,2,3,u,u,u,u,20,21,16,17,u,u,u,u,22,23,18,19,u,u,u,u] -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11] -; AVX2-FAST-NEXT: vpblendvb %ymm4, {{.*}}(%rip), %ymm5, %ymm4 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u> -; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm1, %ymm3, %ymm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,u,u,u,u,10,11,14,15,u,u,u,u,24,25,28,29,u,u,u,u,26,27,30,31,u,u,u,u] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[12,13,8,9,u,u,u,u,14,15,10,11,u,u,u,u,28,29,24,25,u,u,u,u,30,31,26,27,u,u,u,u] -; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm0 -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7] -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7] -; AVX2-FAST-NEXT: vmovdqu %ymm0, 32(%rdi) -; AVX2-FAST-NEXT: vmovdqu %ymm1, (%rdi) -; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: PR45604: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqa (%rsi), %xmm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,0,2] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u> +; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm3[1],ymm1[2],ymm3[3],ymm1[4],ymm3[5],ymm1[6],ymm3[7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3] +; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7] +; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) +; AVX2-NEXT: vmovdqu %ymm1, (%rdi) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %v1 = load <8 x i16>, <8 x i16>* %src, align 16 %v2 = shufflevector <8 x i16> %v1, <8 x i16> zeroinitializer, <16 x i32> %v3 = shufflevector <16 x i16> %v2, <16 x i16> , <32 x i32> diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll index 3bbf4f20d60af..d6849a94ddb91 100644 --- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -285,9 +285,9 @@ define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: uge_v8i16: @@ -316,9 +316,9 @@ define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v8i16: @@ -451,3 +451,86 @@ define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) { ret <16 x i1> %cmp } +define <8 x i16> @PR47448_uge(i16 signext %0) { +; SSE2-LABEL: PR47448_uge: +; SSE2: # %bb.0: +; SSE2-NEXT: andl $7, %edi +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] +; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: PR47448_uge: +; SSE41: # %bb.0: +; SSE41-NEXT: andl $7, %edi +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7] +; SSE41-NEXT: pmaxuw %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: PR47448_uge: +; AVX1: # %bb.0: +; AVX1-NEXT: andl $7, %edi +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR47448_uge: +; AVX2: # %bb.0: +; AVX2-NEXT: andl $7, %edi +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq + %2 = and i16 %0, 7 + %3 = insertelement <8 x i16> undef, i16 %2, i32 0 + %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer + %5 = icmp uge <8 x i16> %4, + %6 = sext <8 x i1> %5 to <8 x i16> + ret <8 x i16> %6 +} + +define <8 x i16> @PR47448_ugt(i16 signext %0) { +; SSE-LABEL: PR47448_ugt: +; SSE: # %bb.0: +; SSE-NEXT: andl $7, %edi +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: PR47448_ugt: +; AVX1: # %bb.0: +; AVX1-NEXT: andl $7, %edi +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR47448_ugt: +; AVX2: # %bb.0: +; AVX2-NEXT: andl $7, %edi +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: retq + %2 = and i16 %0, 7 + %3 = insertelement <8 x i16> undef, i16 %2, i32 0 + %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer + %5 = icmp ugt <8 x i16> %4, + %6 = sext <8 x i1> %5 to <8 x i16> + ret <8 x i16> %6 +} diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index d97b9a359b1ae..b43510f7fd194 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -1905,11 +1905,20 @@ define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX2-NEXT: retq ; -; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] -; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512-NEXT: retq +; AVX512F-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] +; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,33,34,35,3,37,38,39,4,41,42,43,5,45,46,47] +; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512BW-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <4 x i64> diff --git a/llvm/test/DebugInfo/Generic/pr40628.ll b/llvm/test/DebugInfo/Generic/pr40628.ll index 69f09f7eb5bf9..af994de725066 100644 --- a/llvm/test/DebugInfo/Generic/pr40628.ll +++ b/llvm/test/DebugInfo/Generic/pr40628.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S %s -o - | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S %s -o - | FileCheck %s ; PR40628: The first load below is determined to be redundant by EarlyCSE. ; During salvaging, the corresponding dbg.value could have a DW_OP_deref used diff --git a/llvm/test/DebugInfo/X86/struct-fwd-decl.ll b/llvm/test/DebugInfo/X86/struct-fwd-decl.ll new file mode 100644 index 0000000000000..adee78d1ced11 --- /dev/null +++ b/llvm/test/DebugInfo/X86/struct-fwd-decl.ll @@ -0,0 +1,21 @@ +; RUN: llc -O0 -mtriple=x86_64-unknown-linux %s -o %t -filetype=obj +; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s +; Test that size is not emitted for class declarations in DWARF, even if it exists. + +@s = global i16 0, align 2, !dbg !0 + +!llvm.dbg.cu = !{!4} +!llvm.module.flags = !{!7} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "s", scope: null, file: !2, line: 2, type: !3, isLocal: false, isDefinition: true) +!2 = !DIFile(filename: "foo.cpp", directory: "/tmp") +!3 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !2, line: 1, size: 16, align: 16, flags: DIFlagFwdDecl) +!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !5, retainedTypes: !5, globals: !6, imports: !5) +!5 = !{} +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name +; CHECK-NOT: DW_AT_byte_size +; CHECK: {{NULL|DW_TAG}} +!6 = !{!0} +!7 = !{i32 1, !"Debug Info Version", i32 3} diff --git a/llvm/test/DebugInfo/salvage-cast-debug-info.ll b/llvm/test/DebugInfo/salvage-cast-debug-info.ll index f39ac46a66a4b..c4f2111bddf22 100644 --- a/llvm/test/DebugInfo/salvage-cast-debug-info.ll +++ b/llvm/test/DebugInfo/salvage-cast-debug-info.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -debugify -early-cse -S | FileCheck %s +; RUN: opt %s -debugify -early-cse -earlycse-debug-hash -S | FileCheck %s define i32 @foo(i64 %nose, i32 %more) { ; CHECK-LABEL: @foo( ; CHECK: call void @llvm.dbg.value(metadata i64 %nose, metadata [[V1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned diff --git a/llvm/test/Feature/OperandBundles/early-cse.ll b/llvm/test/Feature/OperandBundles/early-cse.ll index fc201479d8ce8..cf06cd1e1f1db 100644 --- a/llvm/test/Feature/OperandBundles/early-cse.ll +++ b/llvm/test/Feature/OperandBundles/early-cse.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; While it is normally okay to do memory optimizations over calls to ; @readonly_function and @readnone_function, we cannot do that if diff --git a/llvm/test/Feature/OperandBundles/special-state.ll b/llvm/test/Feature/OperandBundles/special-state.ll index 56e337cc16b37..f51becaa50984 100644 --- a/llvm/test/Feature/OperandBundles/special-state.ll +++ b/llvm/test/Feature/OperandBundles/special-state.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; This test isn't directly related to EarlyCSE or varargs. It is just ; using these as a vehicle for testing the correctness of diff --git a/llvm/test/MC/AArch64/arm64_32-compact-unwind.s b/llvm/test/MC/AArch64/arm64_32-compact-unwind.s index 59d882ae3a5c0..d221640a07478 100644 --- a/llvm/test/MC/AArch64/arm64_32-compact-unwind.s +++ b/llvm/test/MC/AArch64/arm64_32-compact-unwind.s @@ -4,7 +4,7 @@ ; The compact unwind format in ILP32 mode is pretty much the same, except ; references to addresses (function, personality, LSDA) are pointer-sized. -; CHECK: Contents of section __compact_unwind: +; CHECK: Contents of section __LD,__compact_unwind: ; CHECK-NEXT: 0004 00000000 04000000 00000002 00000000 ; CHECK-NEXT: 0014 00000000 .globl _test_compact_unwind diff --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s index 633eeb50d8dd7..f7faa64b9309a 100644 --- a/llvm/test/MC/AArch64/seh.s +++ b/llvm/test/MC/AArch64/seh.s @@ -20,7 +20,7 @@ // CHECK-NEXT: } // CHECK: Section { // CHECK: Name: .xdata -// CHECK: RawDataSize: 48 +// CHECK: RawDataSize: 56 // CHECK: RelocationCount: 1 // CHECK: Characteristics [ // CHECK-NEXT: ALIGN_4BYTES @@ -41,7 +41,7 @@ // CHECK-NEXT: Relocations [ // CHECK-NEXT: Section (4) .xdata { -// CHECK-NEXT: 0x24 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler +// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler // CHECK-NEXT: } // CHECK-NEXT: Section (5) .pdata { // CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func @@ -54,8 +54,12 @@ // CHECK-NEXT: Function: func // CHECK-NEXT: ExceptionRecord: .xdata // CHECK-NEXT: ExceptionData { -// CHECK-NEXT: FunctionLength: 72 +// CHECK-NEXT: FunctionLength: 100 // CHECK: Prologue [ +// CHECK-NEXT: 0xec ; clear unwound to call +// CHECK-NEXT: 0xea ; context +// CHECK-NEXT: 0xe9 ; machine frame +// CHECK-NEXT: 0xe8 ; trap frame // CHECK-NEXT: 0xe3 ; nop // CHECK-NEXT: 0xe202 ; add fp, sp, #16 // CHECK-NEXT: 0xdd41 ; str d13, [sp, #8] @@ -66,7 +70,10 @@ // CHECK-NEXT: 0x46 ; stp x29, x30, [sp, #48] // CHECK-NEXT: 0xd141 ; str x24, [sp, #8] // CHECK-NEXT: 0xd483 ; str x23, [sp, #-32]! +// CHECK-NEXT: 0xe6 ; save next // CHECK-NEXT: 0xc882 ; stp x21, x22, [sp, #16] +// CHECK-NEXT: 0xd6c2 ; stp x25, lr, [sp, #16] +// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! // CHECK-NEXT: 0xcc03 ; stp x19, x20, [sp, #-32]! // CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! // CHECK-NEXT: 0xe1 ; mov fp, sp @@ -75,8 +82,8 @@ // CHECK-NEXT: ] // CHECK-NEXT: EpilogueScopes [ // CHECK-NEXT: EpilogueScope { -// CHECK-NEXT: StartOffset: 16 -// CHECK-NEXT: EpilogueStartIndex: 25 +// CHECK-NEXT: StartOffset: 23 +// CHECK-NEXT: EpilogueStartIndex: 33 // CHECK-NEXT: Opcodes [ // CHECK-NEXT: 0x01 ; add sp, #16 // CHECK-NEXT: 0xe4 ; end @@ -108,8 +115,14 @@ func: .seh_save_fplr_x 32 stp x19, x20, [sp, #-32]! .seh_save_regp_x x19, 32 + stp x19, x20, [sp, #-32]! + .seh_save_r19r20_x 32 + stp x25, x30, [sp, #16] + .seh_save_lrpair x25, 16 stp x21, x22, [sp, #16] .seh_save_regp x21, 16 + stp x23, x24, [sp, #32] + .seh_save_next str x23, [sp, #-32]! .seh_save_reg_x x23, 32 str x24, [sp, #8] @@ -130,6 +143,14 @@ func: .seh_add_fp 16 nop .seh_nop + nop + .seh_trap_frame + nop + .seh_pushframe + nop + .seh_context + nop + .seh_clear_unwound_to_call .seh_endprologue nop .seh_startepilogue diff --git a/llvm/test/MC/AMDGPU/add-sub-no-carry.s b/llvm/test/MC/AMDGPU/add-sub-no-carry.s index 8398199a89568..884d1dd850722 100644 --- a/llvm/test/MC/AMDGPU/add-sub-no-carry.s +++ b/llvm/test/MC/AMDGPU/add-sub-no-carry.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI --implicit-check-not=error: %s // FIXME: pre-gfx9 errors should be more useful diff --git a/llvm/test/MC/AMDGPU/atomic-fadd-insts.s b/llvm/test/MC/AMDGPU/atomic-fadd-insts.s index a0a516e4d772b..70014c6fafc46 100644 --- a/llvm/test/MC/AMDGPU/atomic-fadd-insts.s +++ b/llvm/test/MC/AMDGPU/atomic-fadd-insts.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=GFX908 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX908-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefix=GFX908-ERR --implicit-check-not=error: %s buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 // GFX908: encoding: [0xff,0x0f,0x34,0xe1,0x00,0x05,0x02,0x03] diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s index 196dcada2ebea..86efb1883339b 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR -check-prefix=GCN-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s index 15cfb225b8b55..f8e6407c0548e 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/dl-insts-err.s b/llvm/test/MC/AMDGPU/dl-insts-err.s index 3f6d4fd861102..efdf079d8b889 100644 --- a/llvm/test/MC/AMDGPU/dl-insts-err.s +++ b/llvm/test/MC/AMDGPU/dl-insts-err.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx800 -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx800 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 // // Test unsupported GPUs. @@ -44,17 +44,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[] v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0,0,0,0] @@ -72,17 +72,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[] v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0] @@ -100,17 +100,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[] v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0,0,0] @@ -128,17 +128,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[] v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,0,0,0,0] @@ -156,17 +156,17 @@ v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[] v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0,0,0,0] @@ -184,17 +184,17 @@ v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[] v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0] @@ -216,17 +216,17 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[] v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0,0,0,0] @@ -246,15 +246,15 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[,] v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[,0] // GFX906-GFX908: error: invalid op_sel_hi value v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0] diff --git a/llvm/test/MC/AMDGPU/dpp-err.s b/llvm/test/MC/AMDGPU/dpp-err.s index a3ab0f38abf7b..19d896d82d592 100644 --- a/llvm/test/MC/AMDGPU/dpp-err.s +++ b/llvm/test/MC/AMDGPU/dpp-err.s @@ -1,38 +1,38 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GFX89 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX89 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GFX89-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX89-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s v_mov_b32_dpp v0, v1 row_share:1 row_mask:0x1 bank_mask:0x1 -// GFX89-ERR: not a valid operand. +// GFX89-ERR: error: not a valid operand. // GFX10: v_mov_b32_dpp v0, v1 row_share:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x51,0x01,0x11] v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 -// GFX89-ERR: not a valid operand. +// GFX89-ERR: error: not a valid operand. // GFX10: v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x61,0x01,0x11] v_mov_b32_dpp v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x30,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x38,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x34,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x3c,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x42,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x43,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/ds-err.s b/llvm/test/MC/AMDGPU/ds-err.s index 7ed4080246a03..507bcbc1c4da9 100644 --- a/llvm/test/MC/AMDGPU/ds-err.s +++ b/llvm/test/MC/AMDGPU/ds-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --implicit-check-not=error: %s // offset too big // CHECK: error: invalid operand for instruction @@ -18,19 +18,19 @@ ds_write2_b32 v2, v4, v6 offset0:4 offset0:8 ds_write2_b32 v2, v4, v6 offset1:4 offset1:8 // offset0 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset0:1000000000 // offset0 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset0:0x100 // offset1 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset1:1000000000 // offset1 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset1:0x100 //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ ds_write2_b32 v2, v4, v6 offset1:0x100 // CHECK: error: expected a colon ds_swizzle_b32 v8, v2 offset -// CHECK: error: failed parsing operand +// CHECK: error: unknown token in expression ds_swizzle_b32 v8, v2 offset: // CHECK: error: expected a colon @@ -121,5 +121,5 @@ ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "ppii") // CHECK: error: expected a 5-character mask ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppiii") -// CHECK: invalid mask +// CHECK: error: invalid mask ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppi2") diff --git a/llvm/test/MC/AMDGPU/ds-gfx9.s b/llvm/test/MC/AMDGPU/ds-gfx9.s index 810ccb018e855..2ed2f953b0ca8 100644 --- a/llvm/test/MC/AMDGPU/ds-gfx9.s +++ b/llvm/test/MC/AMDGPU/ds-gfx9.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s ds_read_u8_d16 v8, v2 // GFX9: ds_read_u8_d16 v8, v2 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x00,0x00,0x08] diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index 70f52972a81c8..25c3cdd38830b 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -3,9 +3,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI // RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOCI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Checks for 16-bit Offsets @@ -16,11 +16,11 @@ ds_add_u32 v2, v4 offset:16 // VI: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00] ds_add_src2_f32 v255 offset:65535 -// NOSICI: error +// NOSICI: error: not a valid operand. // VI: ds_add_src2_f32 v255 offset:65535 ; encoding: [0xff,0xff,0x2a,0xd9,0xff,0x00,0x00,0x00] ds_add_src2_f32 v0 offset:4 gds -// NOSICI: error +// NOSICI: error: not a valid operand. // VI: ds_add_src2_f32 v0 offset:4 gds ; encoding: [0x04,0x00,0x2b,0xd9,0x00,0x00,0x00,0x00] //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/exp-err.s b/llvm/test/MC/AMDGPU/exp-err.s index 22d3edf0e0318..b3494a11fa08b 100644 --- a/llvm/test/MC/AMDGPU/exp-err.s +++ b/llvm/test/MC/AMDGPU/exp-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s exp mrt8 v3, v2, v1, v0 // GCN: :5: error: invalid exp target diff --git a/llvm/test/MC/AMDGPU/exp-gfx10.s b/llvm/test/MC/AMDGPU/exp-gfx10.s index e207c5f0ede3c..2a02cef542ee3 100644 --- a/llvm/test/MC/AMDGPU/exp-gfx10.s +++ b/llvm/test/MC/AMDGPU/exp-gfx10.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=verde -show-encoding %s 2>&1 | FileCheck -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=verde %s 2>&1 | FileCheck -check-prefix=SI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=VI --implicit-check-not=error: %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s exp prim v1, off, off, off diff --git a/llvm/test/MC/AMDGPU/expressions-gfx10.s b/llvm/test/MC/AMDGPU/expressions-gfx10.s index b3f051b819b7f..8c413879a3c02 100644 --- a/llvm/test/MC/AMDGPU/expressions-gfx10.s +++ b/llvm/test/MC/AMDGPU/expressions-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck %s --check-prefix=GFX10 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=NOGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=NOGFX10 --implicit-check-not=error: %s i1=1 diff --git a/llvm/test/MC/AMDGPU/expressions-gfx9.s b/llvm/test/MC/AMDGPU/expressions-gfx9.s index a52887596af60..5419c8ed5cb9f 100644 --- a/llvm/test/MC/AMDGPU/expressions-gfx9.s +++ b/llvm/test/MC/AMDGPU/expressions-gfx9.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Relocatable expressions cannot be used with SDWA modifiers. diff --git a/llvm/test/MC/AMDGPU/expressions.s b/llvm/test/MC/AMDGPU/expressions.s index 37fe08a52d1ba..57f47d8f0345d 100644 --- a/llvm/test/MC/AMDGPU/expressions.s +++ b/llvm/test/MC/AMDGPU/expressions.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Floating-point expressions are not supported @@ -52,10 +52,10 @@ v_mad_f16 v5, v1, v2, |hm1| // Only primary expressions are allowed v_ceil_f32 v1, |1+i1| -// NOVI: failed parsing operand +// NOVI: error: expected vertical bar v_ceil_f32 v1, |i1+1| -// NOVI: failed parsing operand +// NOVI: error: expected vertical bar //===----------------------------------------------------------------------===// // Constant expressions may be used with 'abs' and 'neg' modifiers. @@ -327,8 +327,8 @@ v_sin_f32 v0, -[ttmp0] s1000=1 v_sin_f32 v0, -s1000 -// NOVI: failed parsing operand +// NOVI: error: not a valid operand. xnack_mask_lo=1 v_sin_f32 v0, xnack_mask_lo -// NOVI: failed parsing operand +// NOVI: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/flat-gfx10.s b/llvm/test/MC/AMDGPU/flat-gfx10.s index bf728d1618be9..90229630cfe72 100644 --- a/llvm/test/MC/AMDGPU/flat-gfx10.s +++ b/llvm/test/MC/AMDGPU/flat-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s flat_load_dword v1, v[3:4] // GFX10: encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x7d,0x01] diff --git a/llvm/test/MC/AMDGPU/flat-gfx9.s b/llvm/test/MC/AMDGPU/flat-gfx9.s index bb6839a9b13f9..f0aff08fe6db0 100644 --- a/llvm/test/MC/AMDGPU/flat-gfx9.s +++ b/llvm/test/MC/AMDGPU/flat-gfx9.s @@ -1,8 +1,8 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s flat_load_dword v1, v[3:4] offset:0 diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s index b771073407fe7..7a1d3333fb73d 100644 --- a/llvm/test/MC/AMDGPU/flat-global.s +++ b/llvm/test/MC/AMDGPU/flat-global.s @@ -1,14 +1,14 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR --implicit-check-not=error: %s global_load_ubyte v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x20,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte v1, v[3:4], off ; encoding: [0x00,0x80,0x40,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_ubyte v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x20,0xdc,0x03,0x00,0x7d,0x01] @@ -18,7 +18,7 @@ global_load_ubyte v1, v[3:4], off dlc global_load_sbyte v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x24,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte v1, v[3:4], off ; encoding: [0x00,0x80,0x44,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x24,0xdc,0x03,0x00,0x7d,0x01] @@ -28,7 +28,7 @@ global_load_sbyte v1, v[3:4], off dlc global_load_ushort v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x28,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ushort v1, v[3:4], off ; encoding: [0x00,0x80,0x48,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_ushort v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x28,0xdc,0x03,0x00,0x7d,0x01] @@ -38,7 +38,7 @@ global_load_ushort v1, v[3:4], off dlc global_load_sshort v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x2c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sshort v1, v[3:4], off ; encoding: [0x00,0x80,0x4c,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sshort v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x2c,0xdc,0x03,0x00,0x7d,0x01] @@ -48,7 +48,7 @@ global_load_sshort v1, v[3:4], off dlc global_load_dword v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x30,0xdc,0x03,0x00,0x7d,0x01] @@ -58,7 +58,7 @@ global_load_dword v1, v[3:4], off dlc global_load_dwordx2 v[1:2], v[3:4], off // GFX10: encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx2 v[1:2], v[3:4], off ; encoding: [0x00,0x80,0x54,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx2 v[1:2], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x34,0xdc,0x03,0x00,0x7d,0x01] @@ -68,7 +68,7 @@ global_load_dwordx2 v[1:2], v[3:4], off dlc global_load_dwordx3 v[1:3], v[3:4], off // GFX10: encoding: [0x00,0x80,0x3c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx3 v[1:3], v[3:4], off ; encoding: [0x00,0x80,0x58,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx3 v[1:3], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x3c,0xdc,0x03,0x00,0x7d,0x01] @@ -78,7 +78,7 @@ global_load_dwordx3 v[1:3], v[3:4], off dlc global_load_dwordx4 v[1:4], v[3:4], off // GFX10: encoding: [0x00,0x80,0x38,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx4 v[1:4], v[3:4], off ; encoding: [0x00,0x80,0x5c,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx4 v[1:4], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x38,0xdc,0x03,0x00,0x7d,0x01] @@ -119,7 +119,7 @@ global_load_dword v1, v[3:4] off, offset:-4097 global_store_byte v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_byte v[3:4], v1, off ; encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_byte v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x60,0xdc,0x03,0x01,0x7d,0x00] @@ -129,7 +129,7 @@ global_store_byte v[3:4], v1, off dlc global_store_short v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_short v[3:4], v1, off ; encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_short v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x68,0xdc,0x03,0x01,0x7d,0x00] @@ -139,7 +139,7 @@ global_store_short v[3:4], v1, off dlc global_store_dword v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dword v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x70,0xdc,0x03,0x01,0x7d,0x00] @@ -149,7 +149,7 @@ global_store_dword v[3:4], v1, off dlc global_store_dwordx2 v[3:4], v[1:2], off // GFX10: encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx2 v[3:4], v[1:2], off ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx2 v[3:4], v[1:2], off dlc // GFX10: encoding: [0x00,0x90,0x74,0xdc,0x03,0x01,0x7d,0x00] @@ -159,7 +159,7 @@ global_store_dwordx2 v[3:4], v[1:2], off dlc global_store_dwordx3 v[3:4], v[1:3], off // GFX10: encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx3 v[3:4], v[1:3], off ; encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx3 v[3:4], v[1:3], off dlc // GFX10: encoding: [0x00,0x90,0x7c,0xdc,0x03,0x01,0x7d,0x00] @@ -169,7 +169,7 @@ global_store_dwordx3 v[3:4], v[1:3], off dlc global_store_dwordx4 v[3:4], v[1:4], off // GFX10: encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx4 v[3:4], v[1:4], off ; encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx4 v[3:4], v[1:4], off dlc // GFX10: encoding: [0x00,0x90,0x78,0xdc,0x03,0x01,0x7d,0x00] @@ -179,32 +179,32 @@ global_store_dwordx4 v[3:4], v[1:4], off dlc global_store_dword v[3:4], v1, off offset:12 // GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: [[@LINE-3]]:36: error: not a valid operand +// VI-ERR: :36: error: not a valid operand global_load_dword v1, v3, s[2:3] // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: [[@LINE-3]]:1: error: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v3, s[2:3] offset:24 // GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: [[@LINE-3]]:34: error: not a valid operand. +// VI-ERR: :34: error: not a valid operand. global_load_dword v1, v3, s[2:3] offset:-8 // GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: [[@LINE-3]]:34: error: not a valid operand. +// VI-ERR: :34: error: not a valid operand. global_store_dword v3, v1, s[2:3] // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v3, v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dword v3, v1, s[2:3] offset:24 // GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v3, v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] -// VI-ERR: [[@LINE-3]]:35: error: not a valid operand. +// VI-ERR: :35: error: not a valid operand. global_store_dword v3, v1, s[2:3] offset:-8 // GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00] @@ -215,7 +215,7 @@ global_store_dword v3, v1, s[2:3] offset:-8 global_store_dword v3, v1, exec // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00] // GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v[3:4], s2 // GFX10-ERR: error: invalid operand for instruction @@ -250,107 +250,107 @@ global_atomic_swap_x2 v[3:4], v[5:6], off global_atomic_add v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xc8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add v[3:4], v5, off ; encoding: [0x00,0x80,0x08,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_sub v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xcc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub v[3:4], v5, off ; encoding: [0x00,0x80,0x0c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smin v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin v[3:4], v5, off ; encoding: [0x00,0x80,0x10,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umin v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xd8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin v[3:4], v5, off ; encoding: [0x00,0x80,0x14,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smax v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xdc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax v[3:4], v5, off ; encoding: [0x00,0x80,0x18,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umax v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xe0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax v[3:4], v5, off ; encoding: [0x00,0x80,0x1c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_and v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xe4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and v[3:4], v5, off ; encoding: [0x00,0x80,0x20,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_or v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xe8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or v[3:4], v5, off ; encoding: [0x00,0x80,0x24,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_xor v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xec,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor v[3:4], v5, off ; encoding: [0x00,0x80,0x28,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_inc v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xf0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc v[3:4], v5, off ; encoding: [0x00,0x80,0x2c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_dec v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xf4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec v[3:4], v5, off ; encoding: [0x00,0x80,0x30,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_add_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x48,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x88,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_sub_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x4c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x8c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smin_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x54,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x90,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umin_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x58,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x94,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smax_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x5c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x98,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umax_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x60,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x9c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_and_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x64,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa0,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_or_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x68,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa4,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_xor_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x6c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa8,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_inc_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x70,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xac,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_dec_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x74,0xdd,0x03,0x05,0x7d,0x00] @@ -490,42 +490,42 @@ global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 global_load_ubyte_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_ubyte_d16_hi v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte_d16_hi v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_short_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_short_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_short_d16_hi v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_short_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_byte_d16_hi v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_short_d16_hi v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_short_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_add v0, v[1:2], v2, off glc slc // GFX10: global_atomic_add v0, v[1:2], v2, off glc slc ; encoding: [0x00,0x80,0xcb,0xdc,0x01,0x02,0x7d,0x00] diff --git a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s index c0e1670a6bd4f..fb795105419ce 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s +++ b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s @@ -1,14 +1,14 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s scratch_load_ubyte v1, v2, off // GFX10: encoding: [0x00,0x40,0x20,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte v1, v2, off ; encoding: [0x00,0x40,0x40,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ubyte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x20,0xdc,0x02,0x00,0x7d,0x01] @@ -18,7 +18,7 @@ scratch_load_ubyte v1, v2, off dlc scratch_load_sbyte v1, v2, off // GFX10: encoding: [0x00,0x40,0x24,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte v1, v2, off ; encoding: [0x00,0x40,0x44,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x24,0xdc,0x02,0x00,0x7d,0x01] @@ -28,7 +28,7 @@ scratch_load_sbyte v1, v2, off dlc scratch_load_ushort v1, v2, off // GFX10: encoding: [0x00,0x40,0x28,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ushort v1, v2, off ; encoding: [0x00,0x40,0x48,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ushort v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x28,0xdc,0x02,0x00,0x7d,0x01] @@ -38,7 +38,7 @@ scratch_load_ushort v1, v2, off dlc scratch_load_sshort v1, v2, off // GFX10: encoding: [0x00,0x40,0x2c,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sshort v1, v2, off ; encoding: [0x00,0x40,0x4c,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sshort v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x2c,0xdc,0x02,0x00,0x7d,0x01] @@ -48,7 +48,7 @@ scratch_load_sshort v1, v2, off dlc scratch_load_dword v1, v2, off // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x30,0xdc,0x02,0x00,0x7d,0x01] @@ -58,7 +58,7 @@ scratch_load_dword v1, v2, off dlc scratch_load_dwordx2 v[1:2], v3, off // GFX10: encoding: [0x00,0x40,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx2 v[1:2], v3, off ; encoding: [0x00,0x40,0x54,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx2 v[1:2], v3, off dlc // GFX10: encoding: [0x00,0x50,0x34,0xdc,0x03,0x00,0x7d,0x01] @@ -68,7 +68,7 @@ scratch_load_dwordx2 v[1:2], v3, off dlc scratch_load_dwordx3 v[1:3], v4, off // GFX10: encoding: [0x00,0x40,0x3c,0xdc,0x04,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx3 v[1:3], v4, off ; encoding: [0x00,0x40,0x58,0xdc,0x04,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx3 v[1:3], v4, off dlc // GFX10: encoding: [0x00,0x50,0x3c,0xdc,0x04,0x00,0x7d,0x01] @@ -78,7 +78,7 @@ scratch_load_dwordx3 v[1:3], v4, off dlc scratch_load_dwordx4 v[1:4], v5, off // GFX10: encoding: [0x00,0x40,0x38,0xdc,0x05,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx4 v[1:4], v5, off ; encoding: [0x00,0x40,0x5c,0xdc,0x05,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx4 v[1:4], v5, off dlc // GFX10: encoding: [0x00,0x50,0x38,0xdc,0x05,0x00,0x7d,0x01] @@ -138,7 +138,7 @@ scratch_load_dword v255, off, s0 offset:2048 scratch_store_byte v1, v2, off // GFX10: encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_byte v1, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_byte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x60,0xdc,0x01,0x02,0x7d,0x00] @@ -148,7 +148,7 @@ scratch_store_byte v1, v2, off dlc scratch_store_short v1, v2, off // GFX10: encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_short v1, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_short v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x68,0xdc,0x01,0x02,0x7d,0x00] @@ -158,7 +158,7 @@ scratch_store_short v1, v2, off dlc scratch_store_dword v1, v2, off // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dword v1, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x70,0xdc,0x01,0x02,0x7d,0x00] @@ -168,7 +168,7 @@ scratch_store_dword v1, v2, off dlc scratch_store_dwordx2 v1, v[2:3], off // GFX10: encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx2 v1, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx2 v1, v[2:3], off dlc // GFX10: encoding: [0x00,0x50,0x74,0xdc,0x01,0x02,0x7d,0x00] @@ -178,7 +178,7 @@ scratch_store_dwordx2 v1, v[2:3], off dlc scratch_store_dwordx3 v1, v[2:4], off // GFX10: encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx3 v1, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx3 v1, v[2:4], off dlc // GFX10: encoding: [0x00,0x50,0x7c,0xdc,0x01,0x02,0x7d,0x00] @@ -188,7 +188,7 @@ scratch_store_dwordx3 v1, v[2:4], off dlc scratch_store_dwordx4 v1, v[2:5], off // GFX10: encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx4 v1, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx4 v1, v[2:5], off dlc // GFX10: encoding: [0x00,0x50,0x78,0xdc,0x01,0x02,0x7d,0x00] @@ -203,7 +203,7 @@ scratch_store_dword v1, v2, off offset:12 scratch_load_dword v1, off, s1 // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] // GFX9: scratch_load_dword v1, off, s1 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, s1 offset:32 // GFX10: encoding: [0x20,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] @@ -213,7 +213,7 @@ scratch_load_dword v1, off, s1 offset:32 scratch_store_dword off, v2, s1 // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_dword off, v2, s1 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, s1 offset:12 // GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] @@ -254,59 +254,59 @@ scratch_store_dword off, v2, exec_hi scratch_load_dword v1, off, exec_lo // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7e,0x01] // GFX9: scratch_load_dword v1, off, exec_lo ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7e,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, exec_lo // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] // GFX9: scratch_store_dword off, v2, exec_lo ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, m0 // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7c,0x01] // GFX9: scratch_load_dword v1, off, m0 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7c,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, m0 // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] // GFX9: scratch_store_dword off, v2, m0 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ubyte_d16 v1, v2, off // GFX10: encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ubyte_d16_hi v1, v2, off // GFX10: encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte_d16 v1, v2, off // GFX10: encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte_d16_hi v1, v2, off // GFX10: encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_short_d16 v1, v2, off // GFX10: encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_short_d16 v1, v2, off ; encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_short_d16_hi v1, v2, off // GFX10: encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_short_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_byte_d16_hi off, v2, s1 // GFX10: encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_byte_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_short_d16_hi off, v2, s1 // GFX10: encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_short_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/flat-scratch.s b/llvm/test/MC/AMDGPU/flat-scratch.s index e87f59dafeeae..eea2f0d07f3ea 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch.s +++ b/llvm/test/MC/AMDGPU/flat-scratch.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOCI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOCI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck -check-prefix=CI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s diff --git a/llvm/test/MC/AMDGPU/flat.s b/llvm/test/MC/AMDGPU/flat.s index 8351233e466b6..bfb71c9ebf4d2 100644 --- a/llvm/test/MC/AMDGPU/flat.s +++ b/llvm/test/MC/AMDGPU/flat.s @@ -7,44 +7,44 @@ // error: instruction not supported on this GPU // -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Operands //===----------------------------------------------------------------------===// flat_load_dword v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] glc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] glc slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01] flat_store_dword v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 glc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CIVI: flat_store_dword v[3:4], v1 glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 glc slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CIVI: flat_store_dword v[3:4], v1 slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00] // FIXME: For atomic instructions, glc must be placed immediately following @@ -53,12 +53,12 @@ flat_store_dword v[3:4], v1 slc // flat_atomic_add v1, v[3:4], v5 slc glc flat_atomic_add v1, v[3:4], v5 offset:0 glc slc -// NOSI: error: +// NOSI: error: not a valid operand. // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add v[3:4], v5 slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00] @@ -67,367 +67,367 @@ flat_atomic_add v[3:4], v5 slc //===----------------------------------------------------------------------===// flat_load_ubyte v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x20,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x40,0xdc,0x03,0x00,0x00,0x01] flat_load_sbyte v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x24,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x00,0x01] flat_load_ushort v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x28,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x00,0x01] flat_load_sshort v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x2c,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01] flat_load_dwordx2 v[1:2], v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x34,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x00,0x01] flat_load_dwordx4 v[5:8], v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x38,0xdc,0x03,0x00,0x00,0x05] // VI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x5c,0xdc,0x03,0x00,0x00,0x05] flat_load_dwordx3 v[5:7], v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05] // VI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x58,0xdc,0x03,0x00,0x00,0x05] flat_store_byte v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_byte v[3:4], v1 ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00] flat_store_short v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_short v[3:4], v1 ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] flat_store_dwordx2 v[3:4], v[1:2] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dwordx2 v[3:4], v[1:2] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00] flat_store_dwordx4 v[3:4], v[5:8] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] // VI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] flat_store_dwordx3 v[3:4], v[5:7] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] // VI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] flat_atomic_swap v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0xc0,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0x00,0xdd,0x03,0x05,0x00,0x00] flat_atomic_swap v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc1,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x01,0xdd,0x03,0x05,0x00,0x01] flat_atomic_cmpswap v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xc4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0x04,0xdd,0x03,0x05,0x00,0x00] flat_atomic_cmpswap v1, v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xc5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x05,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x00,0x00] flat_atomic_add v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x00,0x01] flat_atomic_sub v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0xcc,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0x0c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_sub v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xcd,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x0d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smin v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0xd4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0x10,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smin v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x11,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umin v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0xd8,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0x14,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umin v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd9,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x15,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smax v[3:4], v5, -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0xdc,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0x18,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smax v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xdd,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x19,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umax v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0xe0,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0x1c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umax v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe1,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x1d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_and v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0xe4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0x20,0xdd,0x03,0x05,0x00,0x00] flat_atomic_and v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x21,0xdd,0x03,0x05,0x00,0x01] flat_atomic_or v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0xe8,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0x24,0xdd,0x03,0x05,0x00,0x00] flat_atomic_or v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe9,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x25,0xdd,0x03,0x05,0x00,0x01] flat_atomic_xor v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0xec,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0x28,0xdd,0x03,0x05,0x00,0x00] flat_atomic_xor v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xed,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x29,0xdd,0x03,0x05,0x00,0x01] flat_atomic_inc v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0xf0,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0x2c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_inc v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf1,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x2d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_dec v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0xf4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0x30,0xdd,0x03,0x05,0x00,0x00] flat_atomic_dec v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x31,0xdd,0x03,0x05,0x00,0x01] flat_atomic_fcmpswap v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xf8,0xdc,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xf9,0xdc,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_swap_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x40,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00] flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x41,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01] flat_atomic_cmpswap_x2 v[3:4], v[5:8] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x44,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x84,0xdd,0x03,0x05,0x00,0x00] flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x45,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x85,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x48,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x88,0xdd,0x03,0x05,0x00,0x00] flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x49,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x89,0xdd,0x03,0x05,0x00,0x01] flat_atomic_sub_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x4c,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x8c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x4d,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x8d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smin_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x54,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x90,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x55,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x91,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umin_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x58,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x94,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x59,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x95,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smax_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x5c,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x98,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x5d,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x99,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umax_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x60,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x9c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x61,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x9d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_and_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x64,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa0,0xdd,0x03,0x05,0x00,0x00] flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x65,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa1,0xdd,0x03,0x05,0x00,0x01] flat_atomic_or_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x68,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa4,0xdd,0x03,0x05,0x00,0x00] flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x69,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa5,0xdd,0x03,0x05,0x00,0x01] flat_atomic_xor_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x6c,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa8,0xdd,0x03,0x05,0x00,0x00] flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x6d,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa9,0xdd,0x03,0x05,0x00,0x01] flat_atomic_inc_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x70,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xac,0xdd,0x03,0x05,0x00,0x00] flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x71,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xad,0xdd,0x03,0x05,0x00,0x01] flat_atomic_dec_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x74,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xb0,0xdd,0x03,0x05,0x00,0x00] flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x75,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xb1,0xdd,0x03,0x05,0x00,0x01] flat_atomic_fcmpswap_x2 v[3:4], v[5:8] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdd,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x79,0xdd,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmin_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x7c,0xdd,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x7d,0xdd,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmax_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/fma-mix.s b/llvm/test/MC/AMDGPU/fma-mix.s index 9d4c762bef557..3f510090ee58e 100644 --- a/llvm/test/MC/AMDGPU/fma-mix.s +++ b/llvm/test/MC/AMDGPU/fma-mix.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx904 -show-encoding %s | FileCheck -check-prefix=GFX9-FMAMIX %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s | FileCheck -check-prefix=GFX9-FMAMIX %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-MADMIX-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9-MADMIX-ERR --implicit-check-not=error: %s v_fma_mix_f32 v0, v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] @@ -20,45 +20,57 @@ v_fma_mixhi_f16 v0, v1, v2, v3 v_fma_mix_f32 v0, abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. -// FIXME: Better error -// GFX9-MADMIX-ERR: error: invalid operand for instruction +// FIXME: Improve error messages v_fma_mix_f32 v0, v1, abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, abs(v3) // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, -v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, -v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, -v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, -abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, -abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, -abs(v3) // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) // GFX9-FMAMIX: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3) // GFX9-FMAMIX: v_fma_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixlo_f16 v0, v1, v2, v3 clamp // GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: invalid operand for instruction v_fma_mixhi_f16 v0, v1, v2, v3 clamp // GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: invalid operand for instruction // // op_sel with non-packed instructions @@ -66,38 +78,50 @@ v_fma_mixhi_f16 v0, v1, v2, v3 clamp v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// FIXME: Better error -// GFX-MADMIX-ERR: error: unknown token in expression +// GFX9-MADMIX-ERR: error: not a valid operand. + +// FIXME: Improve error messages v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-MADMIX-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx10-constant-bus.s b/llvm/test/MC/AMDGPU/gfx10-constant-bus.s index d2034ae1354b1..37e853c87be79 100644 --- a/llvm/test/MC/AMDGPU/gfx10-constant-bus.s +++ b/llvm/test/MC/AMDGPU/gfx10-constant-bus.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s //----------------------------------------------------------------------------------------- // On GFX10 we can use two scalar operands (except for 64-bit shift instructions) diff --git a/llvm/test/MC/AMDGPU/gfx1011_err.s b/llvm/test/MC/AMDGPU/gfx1011_err.s index e99716018c051..81c8c6254c037 100644 --- a/llvm/test/MC/AMDGPU/gfx1011_err.s +++ b/llvm/test/MC/AMDGPU/gfx1011_err.s @@ -1,50 +1,50 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s v_dot8c_i32_i4 v5, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES) -// GFX10: error: +// GFX10: error: specified hardware register is not supported on this GPU v_fma_legacy_f32 v0, v1, v2, v3 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] -// GFX10: error: +// GFX10: error: invalid instruction image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 -// GFX10: error: +// GFX10: error: invalid instruction image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] -// GFX10: error: +// GFX10: error: invalid instruction image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 -// GFX10: error: +// GFX10: error: invalid instruction image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY -// GFX10: error: +// GFX10: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index 29d906ec838be..b8e1afdfdb5b0 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -1,140 +1,140 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1031 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1031 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s v_dot8c_i32_i4 v5, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. s_get_waveid_in_workgroup s0 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU s_memtime s[0:1] -// GFX10: error: +// GFX10: error: instruction not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) -// GFX10: error: +// GFX10: error: specified hardware register is not supported on this GPU v_mac_f32 v0, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_mad_f32 v0, v1, v2, v3 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_madak_f32 v0, v1, v2, 1 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_madmk_f32 v0, v1, 1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_mad_legacy_f32 v0, v1, v2, v3 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_mac_legacy_f32 v0, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU ds_add_src2_u32 v1 offset:65535 gds -// GFX10: error: +// GFX10: error: not a valid operand. ds_add_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_add_src2_f32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_sub_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_rsub_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_inc_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_dec_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_i32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_i32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_and_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_or_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_xor_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_f32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_f32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_add_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_sub_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_rsub_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_inc_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_dec_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_i64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_i64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_and_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_or_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_xor_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_f64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_f64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_write_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_write_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s index d1bbde6539417..59c49220111ce 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_DS. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s index ce3cef52e8994..01159c365ebc2 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX10: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s index b8ede28ec0763..e6985532bd1a1 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s index 251dde827b71c..978ec345f2b05 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s @@ -1,9 +1,9 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx601 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx701 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_DS. @@ -124,6 +124,7 @@ s_bitreplicate_b64_b32 s[0:1], s2 s_set_gpr_idx_idx s0 // GFX10: error: instruction not supported on this GPU +// GFX6-7: error: instruction not supported on this GPU // GFX6, GFX7, GFX8, GFX9. @@ -167,6 +168,7 @@ s_pack_hh_b32_b16 s0, s1, s2 s_rfe_restore_b64 s[0:1], s2 // GFX10: error: instruction not supported on this GPU +// GFX6-7: error: instruction not supported on this GPU // GFX6, GFX7, GFX8, GFX9. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s index 3a33ed4b8a608..f99a295362369 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=NOGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOGFX10 --implicit-check-not=error: %s ; TODO: more helpful error message for missing dim operand image_load v[0:3], v0, s[0:7] dmask:0xf unorm diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_all.s b/llvm/test/MC/AMDGPU/gfx8_asm_all.s index 1610bfa7d92a9..1362a4f871b2d 100644 --- a/llvm/test/MC/AMDGPU/gfx8_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx8_asm_all.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=CHECK-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=CHECK-ERR --implicit-check-not=error: %s ds_add_u32 v1, v2 offset:65535 // CHECK: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9-asm-err.s b/llvm/test/MC/AMDGPU/gfx9-asm-err.s index ff63cdf2a35a7..de0930cf952a2 100644 --- a/llvm/test/MC/AMDGPU/gfx9-asm-err.s +++ b/llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9ERR --implicit-check-not=error: %s v_cvt_f16_u16_e64 v5, 0.5 // GFX9ERR: error: invalid literal operand diff --git a/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s b/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s index f996c4e3c0a7e..d1c7a759385a9 100644 --- a/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s +++ b/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX9-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s v_addc_co_u32_e32 v3, vcc, 12345, v3, vcc // GFX9-ERR: error: invalid operand (violates constant bus restrictions) diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_all.s b/llvm/test/MC/AMDGPU/gfx9_asm_all.s index b3b8bf86a131b..93050e4c292b6 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_all.s @@ -1,6 +1,6 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits -show-encoding %s | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits %s 2>&1 | FileCheck -check-prefix=CHECK-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits %s 2>&1 | FileCheck -check-prefix=CHECK-ERR --implicit-check-not=error: %s ds_add_u32 v1, v2 offset:65535 // CHECK: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s index 546ed25a60ebc..14800de71cbd2 100644 --- a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s +++ b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s @@ -10,10 +10,6 @@ v2, v4, v6 # CHECK-NEXT: v2, v4, v6 # CHECK-NEXT: ^ -# CHECK: error: failed parsing operand -# CHECK-NEXT: v2, v4, v6 -# CHECK-NEXT: ^ - # We don't want to see a suggestion here; the edit distance is too large to # give sensible suggestions: diff --git a/llvm/test/MC/AMDGPU/lds_direct-err.s b/llvm/test/MC/AMDGPU/lds_direct-err.s index 578461bc35ad5..3e5bfe48ca0a0 100644 --- a/llvm/test/MC/AMDGPU/lds_direct-err.s +++ b/llvm/test/MC/AMDGPU/lds_direct-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //---------------------------------------------------------------------------// // lds_direct may be used only with vector ALU instructions diff --git a/llvm/test/MC/AMDGPU/lds_direct-gfx10.s b/llvm/test/MC/AMDGPU/lds_direct-gfx10.s index f3174553e5793..61e4de3e46914 100644 --- a/llvm/test/MC/AMDGPU/lds_direct-gfx10.s +++ b/llvm/test/MC/AMDGPU/lds_direct-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX10 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck %s --check-prefix=NOGFX10 --implicit-check-not=error: v_readfirstlane_b32 s0, lds_direct // GFX10: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] diff --git a/llvm/test/MC/AMDGPU/literal16-err.s b/llvm/test/MC/AMDGPU/literal16-err.s index f9b3d40f84d94..6a2f1eb23a474 100644 --- a/llvm/test/MC/AMDGPU/literal16-err.s +++ b/llvm/test/MC/AMDGPU/literal16-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s v_add_f16 v1, 0xfffff, v2 // NOVI: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index dd9d2903a1ae2..b666b7d1cb780 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -4,11 +4,11 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=GFX89 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=GFX89 --check-prefix=GFX9 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICI --check-prefix=NOCIVI --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOGFX89 --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICI --check-prefix=NOCIVI --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOGFX89 --check-prefix=NOGFX9 --implicit-check-not=error: //---------------------------------------------------------------------------// // fp literal, expected fp operand @@ -640,132 +640,133 @@ v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD // named inline values: shared_base, shared_limit, private_base, etc //---------------------------------------------------------------------------// -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb] buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] s_add_i32 s0, src_shared_base, s0 -// NOSICIVI: error: failed parsing operand. + + + + + + +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] s_add_i32 s0, src_shared_limit, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] s_add_i32 s0, src_private_base, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] s_add_i32 s0, src_private_limit, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81] s_add_i32 s0, src_pops_exiting_wave_id, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_limit -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_limit -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c] v_add_u16 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06] v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86] v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68] v_add_u32 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00] v_add_u32_e64 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d] v_cmp_eq_i64 vcc, src_shared_base, v[0:1] -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a] v_max_f16 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16] v_max_f32 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] v_max_f64 v[0:1], src_shared_base, v[0:1] -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18] v_pk_add_f16 v0, src_shared_base, v0 -// NOSICI: error: not a valid operand -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f16 v0, neg(src_shared_base) -// NOSICI: error: not a valid operand -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f16 v0, abs(src_shared_base) -// NOSOCIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f64 v[5:6], |src_shared_base| -// NOSI: error: not a valid operand -// NOCIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f64 v[5:6], -src_shared_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f32 v0, -src_shared_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f32 v0, |src_shared_base| -// NOSICI: error: not a valid operand. -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICI: error: not a valid operand. -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00] v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00] v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD @@ -773,7 +774,7 @@ v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD // named inline values compete with other scalars for constant bus access //---------------------------------------------------------------------------// -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_add_u32 v0, private_base, s0 @@ -782,17 +783,17 @@ v_add_u32 v0, private_base, s0 v_add_u32 v0, scc, s0 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, shared_base, v0, v1 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, shared_limit, v1 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, v1, private_limit @@ -809,29 +810,29 @@ v_div_fmas_f32 v0, v0, scc, v1 v_div_fmas_f32 v0, v0, v1, vccz // v_addc_co_u32 implicitly reads VCC (VOP2) -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_addc_co_u32 v0, vcc, shared_base, v0, vcc -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, shared_base, v0, 0x11213141 // NOGCN: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, scc, v0, 0x11213141 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, private_limit -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, s0 // NOGCN: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], execz, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, private_base, private_limit diff --git a/llvm/test/MC/AMDGPU/literalv216-err.s b/llvm/test/MC/AMDGPU/literalv216-err.s index 3a1c49b136fd2..eefe1b343c3a1 100644 --- a/llvm/test/MC/AMDGPU/literalv216-err.s +++ b/llvm/test/MC/AMDGPU/literalv216-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10 --implicit-check-not=error: %s v_pk_add_f16 v1, -17, v2 // GFX9: error: invalid literal operand @@ -38,12 +38,9 @@ v_pk_mad_i16 v5, 0x3c00, 0x4000, 2 v_pk_mad_i16 v5, 0x3c00, 3, 2 // GFX9: error: invalid literal operand -// GFX10-NOT: error: v_pk_mad_i16 v5, 3, 0x3c00, 2 // GFX9: error: invalid literal operand -// GFX10-NOT: error: v_pk_mad_i16 v5, 3, 2, 0x3c00 // GFX9: error: invalid literal operand -// GFX10-NOT: error: diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s index 9bcc1341774a0..ac05c280f0490 100644 --- a/llvm/test/MC/AMDGPU/literalv216.s +++ b/llvm/test/MC/AMDGPU/literalv216.s @@ -1,8 +1,8 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX9 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX10 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s -check-prefix=NOGFX9 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck %s -check-prefix=NOGFX10 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Inline constants diff --git a/llvm/test/MC/AMDGPU/mad-mix.s b/llvm/test/MC/AMDGPU/mad-mix.s index 539de050f4d75..0a261a922725d 100644 --- a/llvm/test/MC/AMDGPU/mad-mix.s +++ b/llvm/test/MC/AMDGPU/mad-mix.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9-MADMIX %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx904 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR --implicit-check-not=error: %s v_mad_mix_f32 v0, v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] @@ -20,45 +20,57 @@ v_mad_mixhi_f16 v0, v1, v2, v3 v_mad_mix_f32 v0, abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. -// FIXME: Better error -// GFX9-FMAMIX-ERR: error: invalid operand for instruction +// FIXME: Improve diagnistics v_mad_mix_f32 v0, v1, abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, abs(v3) // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, -v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, -v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, -v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, -abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, -abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, -abs(v3) // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3) // GFX9-MADMIX: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3) // GFX9-MADMIX: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixlo_f16 v0, v1, v2, v3 clamp // GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: invalid operand for instruction v_mad_mixhi_f16 v0, v1, v2, v3 clamp // GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: invalid operand for instruction // // op_sel with non-packed instructions @@ -66,38 +78,50 @@ v_mad_mixhi_f16 v0, v1, v2, v3 clamp v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// FIXME: Better error -// GFX-FMAMIX-ERR: error: unknown token in expression +// GFX9-FMAMIX-ERR: error: not a valid operand. + +// FIXME: Improve diagnistics v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-FMAMIX-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/mai-err.s b/llvm/test/MC/AMDGPU/mai-err.s index 9b9b733428e48..6f3361c0c9f3d 100644 --- a/llvm/test/MC/AMDGPU/mai-err.s +++ b/llvm/test/MC/AMDGPU/mai-err.s @@ -1,527 +1,700 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=GFX908 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX900 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=GFX908 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX900 --implicit-check-not=error: %s v_accvgpr_read_b32 v0, v0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 a0, a0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 v0, 1 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 v0, s0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 v0, a0 // GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 v0, v0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, a0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, s0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, 65 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, v0 // GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 v[0:31], v0, v1, a[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], v0, v1, v[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], s0, v1, a[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 65 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 0 // GFX900: error: instruction not supported on this GPU +// GFX908: error: invalid literal operand v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/mai.s b/llvm/test/MC/AMDGPU/mai.s index 09eddb0d258c8..c02139a616fc4 100644 --- a/llvm/test/MC/AMDGPU/mai.s +++ b/llvm/test/MC/AMDGPU/mai.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck -check-prefix=GFX908 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOGFX908 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=NOGFX908 --implicit-check-not=error: %s v_accvgpr_read_b32 v2, a0 // GFX908: v_accvgpr_read_b32 v2, a0 ; encoding: [0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08] diff --git a/llvm/test/MC/AMDGPU/mimg-err.s b/llvm/test/MC/AMDGPU/mimg-err.s index 822ffdd65351e..9c8a9c8abf643 100644 --- a/llvm/test/MC/AMDGPU/mimg-err.s +++ b/llvm/test/MC/AMDGPU/mimg-err.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: //===----------------------------------------------------------------------===// // Image Load/Store diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s index 83835270a1d4a..403ee7d62cc01 100644 --- a/llvm/test/MC/AMDGPU/mimg.s +++ b/llvm/test/MC/AMDGPU/mimg.s @@ -5,12 +5,12 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_1 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_1 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Image Load/Store @@ -201,7 +201,7 @@ image_store v[5:8], v[1:2], s[8:15] dmask:0xf unorm a16 // NOSICI: error: a16 modifier is not supported on this GPU // NOVI: error: a16 modifier is not supported on this GPU -/===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // Image Load/Store: a16 & d16 //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s index 8ea86e7de9657..2fdad57b19295 100644 --- a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s +++ b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Positive tests for legacy format syntax. diff --git a/llvm/test/MC/AMDGPU/mtbuf.s b/llvm/test/MC/AMDGPU/mtbuf.s index f7fdd29bb83b8..0653b591d69d7 100644 --- a/llvm/test/MC/AMDGPU/mtbuf.s +++ b/llvm/test/MC/AMDGPU/mtbuf.s @@ -2,9 +2,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,VI-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,VI-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Positive tests for legacy dfmt/nfmt syntax. diff --git a/llvm/test/MC/AMDGPU/mubuf-gfx9.s b/llvm/test/MC/AMDGPU/mubuf-gfx9.s index d9c3fc39cfd8d..10909c63aff7a 100644 --- a/llvm/test/MC/AMDGPU/mubuf-gfx9.s +++ b/llvm/test/MC/AMDGPU/mubuf-gfx9.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s buffer_load_ubyte_d16 v1, off, s[4:7], s1 // VI-ERR: error: instruction not supported on this GPU @@ -39,23 +39,23 @@ buffer_load_format_d16_hi_x v5, off, s[8:11], s3 buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 idxen offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 offen offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 glc // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 slc // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9a,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v255, off, s[12:15], s4 // GFX9: buffer_store_format_d16_hi_x v255, off, s[12:15], s4 ; encoding: [0x00,0x00,0x9c,0xe0,0x00,0xff,0x03,0x04] @@ -63,20 +63,20 @@ buffer_store_format_d16_hi_x v255, off, s[12:15], s4 buffer_store_format_d16_hi_x v255, off, s[12:15], s4 offset:4095 // GFX9: buffer_store_format_d16_hi_x v255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe0,0x00,0xff,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 idxen offset:4095 // GFX9: buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 offen offset:4095 // GFX9: buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc // GFX9: buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 slc // GFX9: buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x9e,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/mubuf.s b/llvm/test/MC/AMDGPU/mubuf.s index 6c0fdb1408282..a07a0a2aab180 100644 --- a/llvm/test/MC/AMDGPU/mubuf.s +++ b/llvm/test/MC/AMDGPU/mubuf.s @@ -2,9 +2,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI -check-prefix=NOSICIVI -check-prefix=NOSICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI -check-prefix=NOSICIVI -check-prefix=NOSICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI -check-prefix=NOSICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI -check-prefix=NOSICIVI -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI -check-prefix=NOSICIVI -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI -check-prefix=NOSICIVI --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Test for different operand combinations diff --git a/llvm/test/MC/AMDGPU/out-of-range-registers.s b/llvm/test/MC/AMDGPU/out-of-range-registers.s index 53e0f65f0cb15..c7cd03470f9fc 100644 --- a/llvm/test/MC/AMDGPU/out-of-range-registers.s +++ b/llvm/test/MC/AMDGPU/out-of-range-registers.s @@ -1,12 +1,12 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,SI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,CIVI9-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX9-ERR,SICIVI9-ERR,CIVI9-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,SI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,CIVI9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX9-ERR,SICIVI9-ERR,CIVI9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX10-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=SIVICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=SIVICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SIVICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=SIVICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s s_add_i32 s106, s0, s1 // GCN-ERR: error: not a valid operand @@ -84,21 +84,25 @@ s_mov_b32 ttmp12, 0 // SICIVI: error: not a valid operand // GFX9: s_mov_b32 ttmp12, 0 ; encoding: // GFX10: s_mov_b32 ttmp12, 0 ; encoding: +// SIVICI-ERR: error: not a valid operand. s_mov_b32 ttmp15, 0 // SICIVI: error: not a valid operand // GFX9: s_mov_b32 ttmp15, 0 ; encoding: // GFX10: s_mov_b32 ttmp15, 0 ; encoding: +// SIVICI-ERR: error: not a valid operand. s_mov_b32 flat_scratch_lo, 0 // SI-ERR: error: not a valid operand // CIVI9: s_mov_b32 flat_scratch_lo, 0 ; encoding: // GFX10-ERR: error: not a valid operand +// GFX9: s_mov_b32 flat_scratch_lo, 0 ; encoding: [0x80,0x00,0xe6,0xbe] s_mov_b32 flat_scratch_hi, 0 // SI-ERR: error: not a valid operand // CIVI9: s_mov_b32 flat_scratch_hi, 0 ; encoding: // GFX10-ERR: error: not a valid operand +// GFX9: s_mov_b32 flat_scratch_hi, 0 ; encoding: [0x80,0x00,0xe7,0xbe] s_mov_b32 tma_lo, 0 // SIVICI: s_mov_b32 tma_lo, 0 ; encoding: diff --git a/llvm/test/MC/AMDGPU/reg-syntax-err.s b/llvm/test/MC/AMDGPU/reg-syntax-err.s index 8d58630ce8885..dce9375a47111 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-err.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-err.s @@ -1,73 +1,73 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s s_mov_b32 s1, s 1 // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, s[0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0:1] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s0, 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s999 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[1:2] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:2] 1 // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, xnack_mask_lo 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s s0 // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, s[0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0:1] s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s0, s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s999 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[1:2] s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:2] vcc_lo // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, xnack_mask_lo s1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. exp mrt0 v1, v2, v3, v4000 off -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. v_add_f64 v[0:1], v[0:1], v[0xF00000001:0x2] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. v_add_f64 v[0:1], v[0:1], v[0x1:0xF00000002] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:-1] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/reg-syntax-extra.s b/llvm/test/MC/AMDGPU/reg-syntax-extra.s index 4e8216c88d67c..528247f562399 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-extra.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-extra.s @@ -1,48 +1,61 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=NOGCN --check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=NOGCN --check-prefix=NOGFX10 --implicit-check-not=error: %s s_mov_b32 [ttmp5], [ttmp3] // SICI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x03,0xf5,0xbe] // VI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x00,0xf5,0xbe] +// GFX10: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x6f,0x03,0xf1,0xbe] s_mov_b64 [ttmp4,ttmp5], [ttmp2,ttmp3] // SICI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x04,0xf4,0xbe] // VI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x01,0xf4,0xbe] +// GFX10: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x6e,0x04,0xf0,0xbe] s_mov_b64 ttmp[4:5], ttmp[2:3] // SICI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x04,0xf4,0xbe] // VI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x01,0xf4,0xbe] +// GFX10: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x6e,0x04,0xf0,0xbe] s_mov_b64 [s6,s7], s[8:9] // SICI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] // VI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x01,0x86,0xbe] +// GFX10: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] s_mov_b64 s[6:7], [s8,s9] // SICI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] // VI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x01,0x86,0xbe] +// GFX10: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] s_mov_b64 [exec_lo,exec_hi], s[2:3] // SICI: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x04,0xfe,0xbe] // VI: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x01,0xfe,0xbe] +// GFX10: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x04,0xfe,0xbe] s_mov_b64 [flat_scratch_lo,flat_scratch_hi], s[2:3] -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: s_mov_b64 flat_scratch, s[2:3] ; encoding: [0x02,0x01,0xe6,0xbe] +// NOGFX10: error: not a valid operand. s_mov_b64 [vcc_lo,vcc_hi], s[2:3] // SICI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe] // VI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x01,0xea,0xbe] +// GFX10: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe] s_mov_b64 [tba_lo,tba_hi], s[2:3] // SICI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x04,0xec,0xbe] // VI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x01,0xec,0xbe] +// NOGFX10: error: not a valid operand. s_mov_b64 [tma_lo,tma_hi], s[2:3] // SICI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x04,0xee,0xbe] // VI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x01,0xee,0xbe] +// NOGFX10: error: not a valid operand. v_mov_b32_e32 [v1], [v2] // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] @@ -50,80 +63,109 @@ v_mov_b32_e32 [v1], [v2] v_rcp_f64 [v1,v2], [v2,v3] // SICI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e] // VI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x4b,0x02,0x7e] +// GFX10: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e] buffer_load_dwordx4 [v1,v2,v3,v4], off, [s4,s5,s6,s7], s1 // SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] // VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] +// GFX10: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] buffer_load_dword v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], s1 // SICI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01] // VI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01] +// GFX10: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1c,0x01] buffer_store_format_xyzw v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1 // SICI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71] // VI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71] +// GFX10: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1c,0x6d] buffer_load_ubyte v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1 // SICI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71] // VI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71] +// GFX10: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1c,0x6d] buffer_store_dwordx4 v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1 // SICI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71] // VI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71] +// GFX10: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1c,0x6d] s_load_dwordx4 [ttmp4,ttmp5,ttmp6,ttmp7], [ttmp2,ttmp3], ttmp4 // SICI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0] // VI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x37,0x1c,0x08,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dword ttmp1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4 // SICI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2] // VI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x78,0x1b,0x20,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dwordx4 [ttmp8,ttmp9,ttmp10,ttmp11], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2] // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x38,0x1d,0x28,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dwordx4 [ttmp[8],ttmp[8+1],ttmp[5*2],ttmp[(3+2)*2+1]], ttmp[45/11:(33+45)/11], ttmp4 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2] // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x38,0x1d,0x28,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dwordx4 ttmp[7+1:(3+2)*2+1], [ttmp[45/11],ttmp[5],ttmp6,ttmp[(33+45)/11]], ttmp4 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2] // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x38,0x1d,0x28,0xf4,0x00,0x00,0x00,0xe0] flat_load_dword v[8:8], v[2:3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dword v[63/8+1:65/8], v[2:3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dword v8, v[2*2-2:(3+7)/3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dword v[63/8+1], v[2:3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dwordx4 v[8:11], v[2*2-2:(3*3-6)] // VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x38,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dwordx4 v[8/2+4:11/2+6], v[2:3] // VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x38,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dwordx4 [v[8/2+4],v9,v[10],v[11/2+6]], v[2:3] // VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x38,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU v_mul_f32 v0, null, v2 -// NOSICIVI: error: -// GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10] +// NOSICIVI: error: not a valid operand. +// GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10] +// NOVI: error: not a valid operand. v_mul_f64 v[0:1], null, null -// NOSICIVI: error: -// GFX10: v_mul_f64 v[0:1], null, null ; encoding: [0x00,0x00,0x65,0xd5,0x7d,0xfa,0x00,0x00] +// NOSICIVI: error: not a valid operand. +// GFX10: v_mul_f64 v[0:1], null, null ; encoding: [0x00,0x00,0x65,0xd5,0x7d,0xfa,0x00,0x00] +// NOVI: error: not a valid operand. s_add_u32 null, null, null -// NOSICIVI: error: -// GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80] +// NOSICIVI: error: not a valid operand. +// GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80] +// NOVI: error: not a valid operand. s_not_b64 s[2:3], null -// NOSICIVI: error: -// GFX10: s_not_b64 s[2:3], null ; encoding: [0x7d,0x08,0x82,0xbe] +// NOSICIVI: error: not a valid operand. +// GFX10: s_not_b64 s[2:3], null ; encoding: [0x7d,0x08,0x82,0xbe] +// NOVI: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/regression/bug28538.s b/llvm/test/MC/AMDGPU/regression/bug28538.s index 59fac226343d8..f9cdb157bbb11 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28538.s +++ b/llvm/test/MC/AMDGPU/regression/bug28538.s @@ -1,12 +1,12 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOCIVI --check-prefix=NOVI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOCIVI --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOVI: error: failed parsing operand v_mov_b32 v0, v0 row_bcast:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOVI: error: failed parsing operand v_mov_b32 v0, v0 row_bcast:13 diff --git a/llvm/test/MC/AMDGPU/smem-err.s b/llvm/test/MC/AMDGPU/smem-err.s index 83cfeb81b6eef..5f62318a1ac7b 100644 --- a/llvm/test/MC/AMDGPU/smem-err.s +++ b/llvm/test/MC/AMDGPU/smem-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s s_memtime exec // NOVI: :11: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index dcff79fef529e..4d81929b415e0 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -3,12 +3,12 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX1012 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 --implicit-check-not=error: %s s_dcache_wb // GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] @@ -105,14 +105,17 @@ s_store_dword tma_hi, s[2:3], s4 s_load_dword s1, s[2:3], 0xfc glc // GFX89: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x01,0xf4,0xfc,0x00,0x00,0xfa] +// SICI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0xfc,0x83,0x00,0xc0] s_load_dword s1, s[2:3], s4 glc // GFX89: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0x08] +// SICI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x04,0x82,0x00,0xc0] s_buffer_store_dword s10, s[92:95], m0 // GFX89: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU +// GFX10: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dword tba_lo, s[92:95], m0 // VI: s_buffer_store_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -138,14 +141,17 @@ s_buffer_store_dword ttmp0, s[92:95], m0 // VI: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x60,0xc0,0x7c,0x00,0x00,0x00] // GFX9: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU +// GFX10: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 s[10:11], s[92:95], m0 // GFX89: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU +// GFX10: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: invalid operand for instruction +// GFX10: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 tba, s[92:95], m0 glc // VI: s_buffer_store_dwordx2 tba, s[92:95], m0 glc ; encoding: [0x2e,0x1b,0x65,0xc0,0x7c,0x00,0x00,0x00] @@ -154,6 +160,8 @@ s_buffer_store_dwordx2 tba, s[92:95], m0 glc s_buffer_load_dword s10, s[92:95], m0 // GFX89: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xc0,0x7c,0x00,0x00,0x00] +// SICI: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] +// GFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xf4,0x00,0x00,0x00,0xf8] // SICIGFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] s_buffer_load_dword tba_lo, s[92:95], m0 @@ -207,6 +215,7 @@ s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xc0,0x7c,0x00,0x00,0x00] // GFX10: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xf4,0x00,0x00,0x00,0xf8] +// SICI: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x7c,0x5c,0x84,0xc2] //===----------------------------------------------------------------------===// // s_scratch instructions @@ -220,7 +229,7 @@ s_scratch_load_dword s5, s[2:3], s101 s_scratch_load_dword s5, s[2:3], s0 glc // GFX9: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_load_dwordx2 s[100:101], s[2:3], s0 // GFX9: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xc0,0x00,0x00,0x00,0x00] @@ -230,7 +239,7 @@ s_scratch_load_dwordx2 s[100:101], s[2:3], s0 s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc // GFX9: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x1b,0xc0,0x01,0x00,0x00,0x00] // GFX1012: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x19,0xf4,0x01,0x00,0x00,0xfa] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_load_dwordx4 s[20:23], s[4:5], s0 // GFX9: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xc0,0x00,0x00,0x00,0x00] @@ -245,17 +254,17 @@ s_scratch_store_dword s101, s[4:5], s0 s_scratch_store_dword s1, s[4:5], 0x123 glc // GFX9: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x57,0xc0,0x23,0x01,0x00,0x00] // GFX1012: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc // GFX9: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xc0,0x65,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc // GFX9: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction //===----------------------------------------------------------------------===// // s_dcache_discard instructions @@ -288,162 +297,162 @@ s_dcache_discard_x2 s[2:3], 0x0 s_atomic_add s5, s[2:3], s101 // GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_add s5, s[2:3], 0x0 // GFX9: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_add s5, s[2:3], s0 glc // GFX9: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_add_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_and s5, s[2:3], s101 // GFX9: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_and_x2 s[10:11], s[2:3], 0x0 // GFX9: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa2,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa0,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], s101 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], 0x0 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], s0 glc // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x86,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_dec s5, s[2:3], s0 glc // GFX9: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_dec_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_inc s5, s[2:3], s0 glc // GFX9: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_inc_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_or s5, s[2:3], 0x0 // GFX9: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x26,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x24,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_or_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smax s5, s[2:3], s101 // GFX9: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smax_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smin s5, s[2:3], s101 // GFX9: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_sub s5, s[2:3], s101 // GFX9: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_sub_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_swap s5, s[2:3], s101 // GFX9: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_swap_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umax s5, s[2:3], s0 glc // GFX9: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umax_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umin s5, s[2:3], s101 // GFX9: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_xor s5, s[2:3], s101 // GFX9: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_xor_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // s_buffer_atomic instructions @@ -452,162 +461,162 @@ s_atomic_xor_x2 s[10:11], s[2:3], s0 glc s_buffer_atomic_add s5, s[4:7], s101 // GFX9: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_add s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x0a,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_add s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_and s101, s[4:7], s0 // GFX9: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 // GFX9: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x06,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x86,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_dec s5, s[4:7], s0 // GFX9: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_inc s101, s[4:7], s0 // GFX9: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xae,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xac,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_or s5, s[8:11], s0 // GFX9: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 // GFX9: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smax s5, s[4:7], s101 // GFX9: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 // GFX9: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smin s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x12,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x10,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 // GFX9: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_sub s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_swap s5, s[4:7], s0 // GFX9: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umax s5, s[4:7], s0 // GFX9: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umin s5, s[4:7], s0 // GFX9: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_xor s5, s[4:7], s0 // GFX9: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // Unsigned 20-bit offsets (VI+) diff --git a/llvm/test/MC/AMDGPU/smrd-err.s b/llvm/test/MC/AMDGPU/smrd-err.s index d7ef74901c6f0..68f2ac6570c90 100644 --- a/llvm/test/MC/AMDGPU/smrd-err.s +++ b/llvm/test/MC/AMDGPU/smrd-err.s @@ -1,15 +1,14 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=NOVI --implicit-check-not=error: %s s_load_dwordx4 s[100:103], s[2:3], s4 -// VI: error: not a valid operand +// NOVI: error: not a valid operand // SI: s_load_dwordx4 s[100:103], s[2:3], s4 - s_load_dwordx8 s[96:103], s[2:3], s4 -// VI: error: not a valid operand +// NOVI: error: not a valid operand // SI: s_load_dwordx8 s[96:103], s[2:3], s4 s_load_dwordx16 s[88:103], s[2:3], s4 -// VI: error: not a valid operand +// NOVI: error: not a valid operand // SI: s_load_dwordx16 s[88:103], s[2:3], s4 diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index 0ad3b0f20645e..30f01b2ced1c3 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -3,9 +3,9 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=CI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Offset Handling diff --git a/llvm/test/MC/AMDGPU/sop1-err.s b/llvm/test/MC/AMDGPU/sop1-err.s index 0225fa1778ead..6322f5b098c35 100644 --- a/llvm/test/MC/AMDGPU/sop1-err.s +++ b/llvm/test/MC/AMDGPU/sop1-err.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI --implicit-check-not=error: %s s_mov_b32 v1, s2 // GCN: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/sop1.s b/llvm/test/MC/AMDGPU/sop1.s index 76525b943cad1..dafbf650b6715 100644 --- a/llvm/test/MC/AMDGPU/sop1.s +++ b/llvm/test/MC/AMDGPU/sop1.s @@ -1,71 +1,84 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=GFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX89 --check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=GFX89 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX89 --check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=NOVI --check-prefix=NOSICIVI --check-prefix=NOGFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefix=NOGFX89 %s - -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding 2>&1 %s | FileCheck --check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=NOVI --check-prefix=NOSICIVI --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 2>&1 %s | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s s_mov_b32 s1, s2 // SICI: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe] // GFX89: s_mov_b32 s1, s2 ; encoding: [0x02,0x00,0x81,0xbe] +// GFX10: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe] s_mov_b32 s1, 1 // SICI: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe] // GFX89: s_mov_b32 s1, 1 ; encoding: [0x81,0x00,0x81,0xbe] +// GFX10: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe] s_mov_b32 s1, 100 // SICI: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00] // GFX89: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x00,0x81,0xbe,0x64,0x00,0x00,0x00] +// GFX10: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00] // Literal constant sign bit s_mov_b32 s1, 0x80000000 // SICI: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x03,0x81,0xbe,0x00,0x00,0x00,0x80] // GFX89: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x00,0x81,0xbe,0x00,0x00,0x00,0x80] +// GFX10: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x03,0x81,0xbe,0x00,0x00,0x00,0x80] // Negative 32-bit constant s_mov_b32 s0, 0xfe5163ab // SICI: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x03,0x80,0xbe,0xab,0x63,0x51,0xfe] // GFX89: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x00,0x80,0xbe,0xab,0x63,0x51,0xfe] +// GFX10: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x03,0x80,0xbe,0xab,0x63,0x51,0xfe] s_mov_b64 s[2:3], s[4:5] // SICI: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe] // GFX89: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x01,0x82,0xbe] +// GFX10: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe] s_mov_b64 null, s[4:5] // GFX10: s_mov_b64 null, s[4:5] ; encoding: [0x04,0x04,0xfd,0xbe] // NOSICIVI: error: not a valid operand. +// NOGFX9: error: not a valid operand. s_mov_b64 s[2:3], 0xffffffffffffffff // SICI: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe] // GFX89: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x01,0x82,0xbe] +// GFX10: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe] s_mov_b64 s[2:3], 0xffffffff // SICI: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x04,0x82,0xbe,0xff,0xff,0xff,0xff] // GFX89: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x01,0x82,0xbe,0xff,0xff,0xff,0xff] +// GFX10: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x04,0x82,0xbe,0xff,0xff,0xff,0xff] s_mov_b64 s[0:1], 0x80000000 // SICI: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x04,0x80,0xbe,0x00,0x00,0x00,0x80] // GFX89: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x01,0x80,0xbe,0x00,0x00,0x00,0x80] +// GFX10: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x04,0x80,0xbe,0x00,0x00,0x00,0x80] s_mov_b64 s[102:103], -1 // SICI: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe] // NOGFX89: error: not a valid operand +// GFX10: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe] s_cmov_b32 s1, 200 // SICI: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00] // GFX89: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x02,0x81,0xbe,0xc8,0x00,0x00,0x00] +// GFX10: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00] s_cmov_b32 s1, 1.0 // SICI: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe] // GFX89: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x02,0x81,0xbe] +// GFX10: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe] s_cmov_b32 s1, s2 // SICI: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe] // GFX89: s_cmov_b32 s1, s2 ; encoding: [0x02,0x02,0x81,0xbe] +// GFX10: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe] //s_cmov_b64 s[2:3], 1.0 //GCN-FIXME: s_cmov_b64 s[2:3], 1.0 ; encoding: [0xf2,0x05,0x82,0xb3] @@ -73,174 +86,217 @@ s_cmov_b32 s1, s2 s_cmov_b64 s[2:3], s[4:5] // SICI: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe] // GFX89: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x03,0x82,0xbe] +// GFX10: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe] s_not_b32 s1, s2 // SICI: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe] // GFX89: s_not_b32 s1, s2 ; encoding: [0x02,0x04,0x81,0xbe] +// GFX10: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe] s_not_b64 s[2:3], s[4:5] // SICI: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe] // GFX89: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x05,0x82,0xbe] +// GFX10: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe] s_wqm_b32 s1, s2 // SICI: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe] // GFX89: s_wqm_b32 s1, s2 ; encoding: [0x02,0x06,0x81,0xbe] +// GFX10: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe] s_wqm_b64 s[2:3], s[4:5] // SICI: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe] // GFX89: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x07,0x82,0xbe] +// GFX10: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe] s_brev_b32 s1, s2 // SICI: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe] // GFX89: s_brev_b32 s1, s2 ; encoding: [0x02,0x08,0x81,0xbe] +// GFX10: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe] s_brev_b64 s[2:3], s[4:5] // SICI: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe] // GFX89: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x09,0x82,0xbe] +// GFX10: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe] s_bcnt0_i32_b32 s1, s2 // SICI: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe] // GFX89: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0a,0x81,0xbe] +// GFX10: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe] s_bcnt0_i32_b64 s1, s[2:3] // SICI: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe] // GFX89: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0b,0x81,0xbe] +// GFX10: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe] s_bcnt1_i32_b32 s1, s2 // SICI: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe] // GFX89: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0c,0x81,0xbe] +// GFX10: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe] s_bcnt1_i32_b64 s1, s[2:3] // SICI: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe] // GFX89: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0d,0x81,0xbe] +// GFX10: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe] s_ff0_i32_b32 s1, s2 // SICI: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe] // GFX89: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x0e,0x81,0xbe] +// GFX10: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe] s_ff0_i32_b64 s1, s[2:3] // SICI: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe] // GFX89: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0f,0x81,0xbe] +// GFX10: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe] s_ff1_i32_b32 s1, s2 // SICI: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe] // GFX89: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x10,0x81,0xbe] +// GFX10: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe] s_ff1_i32_b64 s1, s[2:3] // SICI: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe] // GFX89: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x11,0x81,0xbe] +// GFX10: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe] s_flbit_i32_b32 s1, s2 // SICI: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe] // GFX89: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x12,0x81,0xbe] +// GFX10: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe] s_flbit_i32_b64 s1, s[2:3] // SICI: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe] // GFX89: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x13,0x81,0xbe] +// GFX10: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe] s_flbit_i32 s1, s2 // SICI: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe] // GFX89: s_flbit_i32 s1, s2 ; encoding: [0x02,0x14,0x81,0xbe] +// GFX10: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe] s_flbit_i32_i64 s1, s[2:3] // SICI: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe] // GFX89: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x15,0x81,0xbe] +// GFX10: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe] s_sext_i32_i8 s1, s2 // SICI: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe] // GFX89: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x16,0x81,0xbe] +// GFX10: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe] s_sext_i32_i16 s1, s2 // SICI: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe] // GFX89: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe] +// GFX10: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe] s_bitset0_b32 s1, s2 // SICI: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe] // GFX89: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x18,0x81,0xbe] +// GFX10: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe] s_bitset0_b64 s[2:3], s4 // SICI: s_bitset0_b64 s[2:3], s4 ; encoding: [0x04,0x1c,0x82,0xbe] // GFX89: s_bitset0_b64 s[2:3], s4 ; encoding: [0x04,0x19,0x82,0xbe] +// GFX10: s_bitset0_b64 s[2:3], s4 ; encoding: [0x04,0x1c,0x82,0xbe] s_bitset1_b32 s1, s2 // SICI: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe] // GFX89: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe] +// GFX10: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe] s_bitset1_b64 s[2:3], s4 // SICI: s_bitset1_b64 s[2:3], s4 ; encoding: [0x04,0x1e,0x82,0xbe] // GFX89: s_bitset1_b64 s[2:3], s4 ; encoding: [0x04,0x1b,0x82,0xbe] +// GFX10: s_bitset1_b64 s[2:3], s4 ; encoding: [0x04,0x1e,0x82,0xbe] s_getpc_b64 s[2:3] // SICI: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe] // GFX89: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1c,0x82,0xbe] +// GFX10: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe] s_setpc_b64 s[4:5] // SICI: s_setpc_b64 s[4:5] ; encoding: [0x04,0x20,0x80,0xbe] // GFX89: s_setpc_b64 s[4:5] ; encoding: [0x04,0x1d,0x80,0xbe] +// GFX10: s_setpc_b64 s[4:5] ; encoding: [0x04,0x20,0x80,0xbe] s_swappc_b64 s[2:3], s[4:5] // SICI: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe] // GFX89: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe] +// GFX10: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe] s_rfe_b64 s[4:5] // SICI: s_rfe_b64 s[4:5] ; encoding: [0x04,0x22,0x80,0xbe] // GFX89: s_rfe_b64 s[4:5] ; encoding: [0x04,0x1f,0x80,0xbe] +// GFX10: s_rfe_b64 s[4:5] ; encoding: [0x04,0x22,0x80,0xbe] s_and_saveexec_b64 s[2:3], s[4:5] // SICI: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe] // GFX89: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe] +// GFX10: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe] s_or_saveexec_b64 s[2:3], s[4:5] // SICI: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe] // GFX89: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe] +// GFX10: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe] s_xor_saveexec_b64 s[2:3], s[4:5] // SICI: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe] // GFX89: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe] +// GFX10: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe] s_andn2_saveexec_b64 s[2:3], s[4:5] // SICI: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe] // GFX89: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x23,0x82,0xbe] +// GFX10: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe] s_orn2_saveexec_b64 s[2:3], s[4:5] // SICI: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe] // GFX89: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe] +// GFX10: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe] s_nand_saveexec_b64 s[2:3], s[4:5] // SICI: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe] // GFX89: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe] +// GFX10: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe] s_nor_saveexec_b64 s[2:3], s[4:5] // SICI: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe] // GFX89: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe] +// GFX10: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe] s_xnor_saveexec_b64 s[2:3], s[4:5] // SICI: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe] // GFX89: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe] +// GFX10: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe] s_quadmask_b32 s1, s2 // SICI: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe] // GFX89: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x28,0x81,0xbe] +// GFX10: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe] s_quadmask_b64 s[2:3], s[4:5] // SICI: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe] // GFX89: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe] +// GFX10: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe] s_movrels_b32 s1, s2 // SICI: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe] // GFX89: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2a,0x81,0xbe] +// GFX10: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe] s_movrels_b64 s[2:3], s[4:5] // SICI: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe] // GFX89: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe] +// GFX10: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe] s_movreld_b32 s1, s2 // SICI: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] // GFX89: s_movreld_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe] +// GFX10: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] s_movreld_b64 s[2:3], s[4:5] // SICI: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe] // GFX89: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe] +// GFX10: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe] s_cbranch_join s4 // SICI: s_cbranch_join s4 ; encoding: [0x04,0x32,0x80,0xbe] @@ -250,55 +306,69 @@ s_cbranch_join s4 s_cbranch_join 1 // NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_cbranch_join 100 // NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_abs_i32 s1, s2 // SICI: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe] // GFX89: s_abs_i32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] +// GFX10: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe] s_set_gpr_idx_idx s0 // GFX89: s_set_gpr_idx_idx s0 ; encoding: [0x00,0x32,0x80,0xbe] // NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_andn1_saveexec_b64 s[100:101], s[2:3] // GFX9: s_andn1_saveexec_b64 s[100:101], s[2:3] ; encoding: [0x02,0x33,0xe4,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[100:101], s[2:3] ; encoding: [0x02,0x37,0xe4,0xbe] s_andn1_saveexec_b64 s[10:11], s[4:5] // GFX9: s_andn1_saveexec_b64 s[10:11], s[4:5] ; encoding: [0x04,0x33,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[10:11], s[4:5] ; encoding: [0x04,0x37,0x8a,0xbe] s_andn1_saveexec_b64 s[10:11], -1 // GFX9: s_andn1_saveexec_b64 s[10:11], -1 ; encoding: [0xc1,0x33,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[10:11], -1 ; encoding: [0xc1,0x37,0x8a,0xbe] s_andn1_saveexec_b64 s[10:11], 0xaf123456 // GFX9: s_andn1_saveexec_b64 s[10:11], 0xaf123456 ; encoding: [0xff,0x33,0x8a,0xbe,0x56,0x34,0x12,0xaf] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[10:11], 0xaf123456 ; encoding: [0xff,0x37,0x8a,0xbe,0x56,0x34,0x12,0xaf] s_andn1_wrexec_b64 s[10:11], s[2:3] // GFX9: s_andn1_wrexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x35,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_wrexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x39,0x8a,0xbe] s_andn2_wrexec_b64 s[12:13], s[2:3] // GFX9: s_andn2_wrexec_b64 s[12:13], s[2:3] ; encoding: [0x02,0x36,0x8c,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn2_wrexec_b64 s[12:13], s[2:3] ; encoding: [0x02,0x3a,0x8c,0xbe] s_orn1_saveexec_b64 s[10:11], 0 // GFX9: s_orn1_saveexec_b64 s[10:11], 0 ; encoding: [0x80,0x34,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_orn1_saveexec_b64 s[10:11], 0 ; encoding: [0x80,0x38,0x8a,0xbe] s_bitreplicate_b64_b32 s[10:11], s101 // GFX9: s_bitreplicate_b64_b32 s[10:11], s101 ; encoding: [0x65,0x37,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_bitreplicate_b64_b32 s[10:11], s101 ; encoding: [0x65,0x3b,0x8a,0xbe] s_bitreplicate_b64_b32 s[10:11], -1 // GFX9: s_bitreplicate_b64_b32 s[10:11], -1 ; encoding: [0xc1,0x37,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_bitreplicate_b64_b32 s[10:11], -1 ; encoding: [0xc1,0x3b,0x8a,0xbe] s_bitreplicate_b64_b32 s[10:11], 0x3f717273 // GFX9: s_bitreplicate_b64_b32 s[10:11], 0x3f717273 ; encoding: [0xff,0x37,0x8a,0xbe,0x73,0x72,0x71,0x3f] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_bitreplicate_b64_b32 s[10:11], 0x3f717273 ; encoding: [0xff,0x3b,0x8a,0xbe,0x73,0x72,0x71,0x3f] diff --git a/llvm/test/MC/AMDGPU/sop2-err.s b/llvm/test/MC/AMDGPU/sop2-err.s index 128a3d7b33ceb..f6a6054ebdccc 100644 --- a/llvm/test/MC/AMDGPU/sop2-err.s +++ b/llvm/test/MC/AMDGPU/sop2-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s s_cbranch_g_fork 100, s[6:7] // GCN: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/sop2.s b/llvm/test/MC/AMDGPU/sop2.s index c1fe19a787d01..89f41a7b3d512 100644 --- a/llvm/test/MC/AMDGPU/sop2.s +++ b/llvm/test/MC/AMDGPU/sop2.s @@ -5,13 +5,12 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX89 --check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefix=NOGFX89 %s - -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding 2>&1 %s | FileCheck --check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 2>&1 %s | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s s_add_u32 s1, s2, s3 // GCN: s_add_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x80] @@ -52,134 +51,167 @@ s_cselect_b64 s[2:3], s[4:5], s[6:7] s_and_b32 s2, s4, s6 // SICI: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87] // GFX89: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x86] +// GFX10: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87] s_and_b32 s2, 1234, 1234 // SICI: s_and_b32 s2, 0x4d2, 0x4d2 ; encoding: [0xff,0xff,0x02,0x87,0xd2,0x04,0x00,0x00] // GFX89: s_and_b32 s2, 0x4d2, 0x4d2 ; encoding: [0xff,0xff,0x02,0x86,0xd2,0x04,0x00,0x00] +// GFX10: s_and_b32 s2, 0x4d2, 0x4d2 ; encoding: [0xff,0xff,0x02,0x87,0xd2,0x04,0x00,0x00] s_and_b32 s2, 0xFFFF0000, -65536 // SICI: s_and_b32 s2, 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x02,0x87,0x00,0x00,0xff,0xff] // GFX89: s_and_b32 s2, 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x02,0x86,0x00,0x00,0xff,0xff] +// GFX10: s_and_b32 s2, 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x02,0x87,0x00,0x00,0xff,0xff] s_and_b64 null, s[4:5], s[6:7] // GFX10: s_and_b64 null, s[4:5], s[6:7] ; encoding: [0x04,0x06,0xfd,0x87] // NOSICIVI: error: not a valid operand. +// NOGFX9: error: not a valid operand. s_and_b64 s[2:3], s[4:5], s[6:7] // SICI: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] // GFX89: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x86] +// GFX10: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] s_or_b32 s2, s4, s6 // SICI: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88] // GFX89: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87] +// GFX10: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88] s_or_b64 s[2:3], s[4:5], s[6:7] // SICI: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88] // GFX89: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] +// GFX10: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88] s_xor_b32 s2, s4, s6 // SICI: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89] // GFX89: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88] +// GFX10: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89] s_xor_b64 s[2:3], s[4:5], s[6:7] // SICI: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89] // GFX89: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88] +// GFX10: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89] s_andn2_b32 s2, s4, s6 // SICI: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a] // GFX89: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89] +// GFX10: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a] s_andn2_b64 s[2:3], s[4:5], s[6:7] // SICI: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a] // GFX89: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89] +// GFX10: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a] s_orn2_b32 s2, s4, s6 // SICI: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b] // GFX89: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a] +// GFX10: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b] s_orn2_b64 s[2:3], s[4:5], s[6:7] // SICI: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b] // GFX89: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a] +// GFX10: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b] s_nand_b32 s2, s4, s6 // SICI: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c] // GFX89: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b] +// GFX10: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c] s_nand_b64 s[2:3], s[4:5], s[6:7] // SICI: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c] // GFX89: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b] +// GFX10: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c] s_nor_b32 s2, s4, s6 // SICI: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d] // GFX89: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c] +// GFX10: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d] s_nor_b64 s[2:3], s[4:5], s[6:7] // SICI: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d] // GFX89: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c] +// GFX10: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d] s_xnor_b32 s2, s4, s6 // SICI: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e] // GFX89: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d] +// GFX10: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e] s_xnor_b64 s[2:3], s[4:5], s[6:7] // SICI: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e] // GFX89: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d] +// GFX10: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e] s_lshl_b32 s2, s4, s6 // SICI: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f] // GFX89: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e] +// GFX10: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f] s_lshl_b64 s[2:3], s[4:5], s6 // SICI: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f] // GFX89: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8e] +// GFX10: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f] s_lshr_b32 s2, s4, s6 // SICI: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90] // GFX89: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f] +// GFX10: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90] s_lshr_b64 s[2:3], s[4:5], s6 // SICI: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90] // GFX89: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f] +// GFX10: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90] s_ashr_i32 s2, s4, s6 // SICI: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91] // GFX89: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90] +// GFX10: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91] s_ashr_i64 s[2:3], s[4:5], s6 // SICI: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91] // GFX89: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90] +// GFX10: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91] s_ashr_i64 s[2:3], -65536, 0xFFFF0000 // SICI: s_ashr_i64 s[2:3], 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x82,0x91,0x00,0x00,0xff,0xff] // GFX89: s_ashr_i64 s[2:3], 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x82,0x90,0x00,0x00,0xff,0xff] +// GFX10: s_ashr_i64 s[2:3], 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x82,0x91,0x00,0x00,0xff,0xff] s_bfm_b32 s2, s4, s6 // SICI: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92] // GFX89: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91] +// GFX10: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92] s_bfm_b64 s[2:3], s4, s6 // SICI: s_bfm_b64 s[2:3], s4, s6 ; encoding: [0x04,0x06,0x82,0x92] // GFX89: s_bfm_b64 s[2:3], s4, s6 ; encoding: [0x04,0x06,0x82,0x91] +// GFX10: s_bfm_b64 s[2:3], s4, s6 ; encoding: [0x04,0x06,0x82,0x92] s_mul_i32 s2, s4, s6 // SICI: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93] // GFX89: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92] +// GFX10: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93] s_bfe_u32 s2, s4, s6 // SICI: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93] // GFX89: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x92] +// GFX10: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93] s_bfe_i32 s2, s4, s6 // SICI: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94] // GFX89: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93] +// GFX10: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94] s_bfe_u64 s[2:3], s[4:5], s6 // SICI: s_bfe_u64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x94] // GFX89: s_bfe_u64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x93] +// GFX10: s_bfe_u64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x94] s_bfe_i64 s[2:3], s[4:5], s6 // SICI: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95] // GFX89: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x94] +// GFX10: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95] s_cbranch_g_fork s[4:5], s[6:7] // SICI: s_cbranch_g_fork s[4:5], s[6:7] ; encoding: [0x04,0x06,0x80,0x95] @@ -199,79 +231,99 @@ s_cbranch_g_fork s[6:7], 2 s_absdiff_i32 s2, s4, s6 // SICI: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96] // GFX89: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x95] +// GFX10: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96] s_add_u32 s101, s102, s103 // SICI: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80] // NOGFX89: error: not a valid operand +// GFX10: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80] s_lshl1_add_u32 s5, s1, s2 // GFX9: s_lshl1_add_u32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0x97] s_lshl1_add_u32 s5, -1, s2 // GFX9: s_lshl1_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0x97] s_lshl1_add_u32 s5, s1, 0 // GFX9: s_lshl1_add_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x97] s_lshl1_add_u32 s5, s1, 0x3f717273 // GFX9: s_lshl1_add_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x05,0x97,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x05,0x97,0x73,0x72,0x71,0x3f] s_lshl2_add_u32 s101, s1, s2 // GFX9: s_lshl2_add_u32 s101, s1, s2 ; encoding: [0x01,0x02,0xe5,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl2_add_u32 s101, s1, s2 ; encoding: [0x01,0x02,0xe5,0x97] s_lshl2_add_u32 s5, 0xaf123456, s2 // GFX9: s_lshl2_add_u32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0x97,0x56,0x34,0x12,0xaf] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl2_add_u32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0x97,0x56,0x34,0x12,0xaf] s_lshl3_add_u32 s5, 0x3f717273, s2 // GFX9: s_lshl3_add_u32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0x98,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl3_add_u32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0x98,0x73,0x72,0x71,0x3f] s_lshl3_add_u32 s5, s1, s101 // GFX9: s_lshl3_add_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x98] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl3_add_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x98] s_lshl4_add_u32 s5, s1, 0xaf123456 // GFX9: s_lshl4_add_u32 s5, s1, 0xaf123456 ; encoding: [0x01,0xff,0x85,0x98,0x56,0x34,0x12,0xaf] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl4_add_u32 s5, s1, 0xaf123456 ; encoding: [0x01,0xff,0x85,0x98,0x56,0x34,0x12,0xaf] s_lshl4_add_u32 s5, -1, s2 // GFX9: s_lshl4_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0x98] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl4_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0x98] s_mul_hi_i32 s5, s101, s2 // GFX9: s_mul_hi_i32 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, s101, s2 ; encoding: [0x65,0x02,0x05,0x9b] s_mul_hi_i32 s5, 0, s2 // GFX9: s_mul_hi_i32 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0x9b] s_mul_hi_i32 s5, 0x3f717273, s2 // GFX9: s_mul_hi_i32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x85,0x96,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0x9b,0x73,0x72,0x71,0x3f] s_mul_hi_i32 s5, s1, s101 // GFX9: s_mul_hi_i32 s5, s1, s101 ; encoding: [0x01,0x65,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x9b] s_mul_hi_i32 s5, s1, 0 // GFX9: s_mul_hi_i32 s5, s1, 0 ; encoding: [0x01,0x80,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x9b] s_mul_hi_u32 s5, s1, 0x3f717273 // GFX9: s_mul_hi_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x05,0x96,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x85,0x9a,0x73,0x72,0x71,0x3f] s_mul_hi_u32 s5, s1, s101 // GFX9: s_mul_hi_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x85,0x9a] s_mul_hi_u32 s5, s1, 0 // GFX9: s_mul_hi_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x85,0x9a] diff --git a/llvm/test/MC/AMDGPU/sopc-err.s b/llvm/test/MC/AMDGPU/sopc-err.s index 88788862f1d75..5f2021a5aaf5d 100644 --- a/llvm/test/MC/AMDGPU/sopc-err.s +++ b/llvm/test/MC/AMDGPU/sopc-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI --implicit-check-not=error: %s s_set_gpr_idx_on s0, s1 // VI: error: expected absolute expression diff --git a/llvm/test/MC/AMDGPU/sopc.s b/llvm/test/MC/AMDGPU/sopc.s index 38b385aa6a360..3ef217798a2ef 100644 --- a/llvm/test/MC/AMDGPU/sopc.s +++ b/llvm/test/MC/AMDGPU/sopc.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // SOPC Instructions @@ -76,41 +76,51 @@ s_cmp_lg_u64 s[0:1], s[2:3] gpr_idx = 1 s_set_gpr_idx_on s0, gpr_idx // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction gpr_idx_mode = 10 s_set_gpr_idx_on s0, gpr_idx_mode + 5 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, 0 // VI: s_set_gpr_idx_on s0, gpr_idx() ; encoding: [0x00,0x00,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx() // VI: s_set_gpr_idx_on s0, gpr_idx() ; encoding: [0x00,0x00,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: unknown token in expression +// GFX10-ERR: error: unknown token in expression s_set_gpr_idx_on s0, 1 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx(SRC0) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, 3 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1) ; encoding: [0x00,0x03,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx(SRC1,SRC0) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1) ; encoding: [0x00,0x03,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: expected ')' in parentheses expression +// GFX10-ERR: error: expected ')' in parentheses expression s_set_gpr_idx_on s0, 15 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx(SRC0,DST,SRC2,SRC1) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: expected ')' in parentheses expression +// GFX10-ERR: error: expected ')' in parentheses expression diff --git a/llvm/test/MC/AMDGPU/sopk-err.s b/llvm/test/MC/AMDGPU/sopk-err.s index 7d1bd8110b5d8..2311c72b52b24 100644 --- a/llvm/test/MC/AMDGPU/sopk-err.s +++ b/llvm/test/MC/AMDGPU/sopk-err.s @@ -1,9 +1,14 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GFX9-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck -check-prefixes=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefixes=SI,SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefixes=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s + +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefixes=GCN,SICIVI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN,SICIVI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN,SICIVI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX10-ERR --implicit-check-not=error: %s s_setreg_b32 0x1f803, s2 // GCN: error: invalid immediate: only 16-bit values are legal @@ -42,61 +47,55 @@ s_getreg_b32 s2, hwreg(3,32,32) // GCN: error: invalid bit offset: only 5-bit values are legal s_cbranch_i_fork s[2:3], 0x6 -// GFX10: error: instruction not supported on this GPU +// SICI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x82,0xb8] +// GFX10-ERR: error: instruction not supported on this GPU +// GFX9: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x02,0xb8] +// VI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x02,0xb8] s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x82,0xb8] // GFX10: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x82,0xb8] s_getreg_b32 s2, hwreg(HW_REG_TBA_LO) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_LO) ; encoding: [0x10,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_TBA_HI) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_HI) ; encoding: [0x11,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_TMA_LO) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_LO) ; encoding: [0x12,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_TMA_HI) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_HI) ; encoding: [0x13,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO) ; encoding: [0x14,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI) ; encoding: [0x15,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) ; encoding: [0x16,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER) ; encoding: [0x19,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_cmpk_le_u32 s2, -1 // GCN: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s index ebadd76cee2f3..e128df94c611f 100644 --- a/llvm/test/MC/AMDGPU/sopk.s +++ b/llvm/test/MC/AMDGPU/sopk.s @@ -4,10 +4,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI9 --check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOSI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOSI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Instructions diff --git a/llvm/test/MC/AMDGPU/sopp-err.s b/llvm/test/MC/AMDGPU/sopp-err.s index 2a78940655fc7..f3181de9438fa 100644 --- a/llvm/test/MC/AMDGPU/sopp-err.s +++ b/llvm/test/MC/AMDGPU/sopp-err.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=SICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // sendmsg @@ -84,15 +84,22 @@ s_sendmsg sendmsg(MSG_GS_DONE, 0, 0) s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) // SICIVI: error: invalid message id +// SICI: error: invalid message id s_sendmsg sendmsg(MSG_GS_ALLOC_REQ, 0) // SICIVI: error: invalid message id +// SICI: error: invalid message id +// GFX10: error: message does not support operations s_sendmsg sendmsg(-1) // SICIVI: error: invalid message id +// SICI: error: invalid message id +// GFX10: error: invalid message id s_sendmsg sendmsg(16) // SICIVI: error: invalid message id +// SICI: error: invalid message id +// GFX10: error: invalid message id s_sendmsg sendmsg(MSG_SYSMSG) // GCN: error: missing message operation @@ -112,6 +119,7 @@ s_sendmsg sendmsg(MSG_SYSMSG, 5) s_waitcnt lgkmcnt(16) // SICIVI: error: too large value for lgkmcnt +// SICI: error: too large value for lgkmcnt s_waitcnt lgkmcnt(64) // GCN: error: too large value for lgkmcnt @@ -121,9 +129,12 @@ s_waitcnt expcnt(8) s_waitcnt vmcnt(16) // SICIVI: error: too large value for vmcnt +// SICI: error: too large value for vmcnt s_waitcnt vmcnt(64) // GFX10: error: too large value for vmcnt +// SICI: error: too large value for vmcnt +// SICIVI: error: too large value for vmcnt s_waitcnt vmcnt(0xFFFFFFFFFFFF0000) // GCN: error: too large value for vmcnt diff --git a/llvm/test/MC/AMDGPU/sopp.s b/llvm/test/MC/AMDGPU/sopp.s index 4be9323741157..63783f61c6bf1 100644 --- a/llvm/test/MC/AMDGPU/sopp.s +++ b/llvm/test/MC/AMDGPU/sopp.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI %s //===----------------------------------------------------------------------===// @@ -357,23 +357,23 @@ s_ttracedata s_set_gpr_idx_off // VI: s_set_gpr_idx_off ; encoding: [0x00,0x00,0x9c,0xbf] -// NOSICI: error: +// NOSICI: error: instruction not supported on this GPU s_set_gpr_idx_mode 0 // VI: s_set_gpr_idx_mode gpr_idx() ; encoding: [0x00,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction s_set_gpr_idx_mode gpr_idx() // VI: s_set_gpr_idx_mode gpr_idx() ; encoding: [0x00,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: unknown token in expression s_set_gpr_idx_mode 15 // VI: s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x0f,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction s_set_gpr_idx_mode gpr_idx(SRC2,SRC1,SRC0,DST) // VI: s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x0f,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: expected ')' in parentheses expression s_endpgm_saved // VI: s_endpgm_saved ; encoding: [0x00,0x00,0x9b,0xbf] diff --git a/llvm/test/MC/AMDGPU/trap.s b/llvm/test/MC/AMDGPU/trap.s index 7b527ba3072e9..5d23c1f30d6ed 100644 --- a/llvm/test/MC/AMDGPU/trap.s +++ b/llvm/test/MC/AMDGPU/trap.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=VI // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Trap Handler related - 32 bit registers diff --git a/llvm/test/MC/AMDGPU/vintrp-err.s b/llvm/test/MC/AMDGPU/vintrp-err.s index 08ab2797ce535..00491e0fe9877 100644 --- a/llvm/test/MC/AMDGPU/vintrp-err.s +++ b/llvm/test/MC/AMDGPU/vintrp-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI --implicit-check-not=error: %s v_interp_p1_f32 v0, v1, attr64.w // GCN: :25: error: out of bounds attr diff --git a/llvm/test/MC/AMDGPU/vop-err.s b/llvm/test/MC/AMDGPU/vop-err.s index 13388263b20e9..c66b5b90e27a4 100644 --- a/llvm/test/MC/AMDGPU/vop-err.s +++ b/llvm/test/MC/AMDGPU/vop-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s // GENERIC LIMITATIONS ON VOP FORMATS: CONSTANT BUS RESTRICTIONS diff --git a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s index 61bf5f6617595..9345632855379 100644 --- a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s +++ b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN,VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN,CI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN,VI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefixes=GCN,CI --implicit-check-not=error: %s v_swap_b32 v1, 1 // GCN: :16: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/vop1-gfx9.s b/llvm/test/MC/AMDGPU/vop1-gfx9.s index 96e328c433bab..9f74e3a71a6c4 100644 --- a/llvm/test/MC/AMDGPU/vop1-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop1-gfx9.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s v_swap_b32 v1, v2 // GFX9: v_swap_b32 v1, v2 ; encoding: [0x02,0xa3,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index e9d288418c42a..12a033c92992c 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: // Force 32-bit encoding diff --git a/llvm/test/MC/AMDGPU/vop2-err.s b/llvm/test/MC/AMDGPU/vop2-err.s index c446f1f01ec15..526483b1f5c32 100644 --- a/llvm/test/MC/AMDGPU/vop2-err.s +++ b/llvm/test/MC/AMDGPU/vop2-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Generic checks diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index 1505c8cfa44d9..b2893154dd6dd 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Generic Checks for floating-point instructions (These have modifiers). diff --git a/llvm/test/MC/AMDGPU/vop3-convert.s b/llvm/test/MC/AMDGPU/vop3-convert.s index 0bdf86cb55862..a654af5e47521 100644 --- a/llvm/test/MC/AMDGPU/vop3-convert.s +++ b/llvm/test/MC/AMDGPU/vop3-convert.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: v_mov_b32 [v1], [v2] // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s index 9fbce05155430..01cbb130f95c6 100644 --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: v_add_f32_e64 v0, v1 // GCN: error: too few operands for instruction diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index e11271ab1eedc..c98fc47093f83 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -1,507 +1,648 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=NOSI,NOSICI,NOGCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefixes=NOCI,NOSICI,NOGCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=NOVI,NOGCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 --implicit-check-not=error: %s v_lshl_add_u32 v1, v2, v3, v4 // GFX9: v_lshl_add_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfd,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_add_lshl_u32 v1, v2, v3, v4 // GFX9: v_add_lshl_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfe,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_add3_u32 v1, v2, v3, v4 // GFX9: v_add3_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xff,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_lshl_or_b32 v1, v2, v3, v4 // GFX9: v_lshl_or_b32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0x00,0xd2,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_and_or_b32 v1, v2, v3, v4 // GFX9: v_and_or_b32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0x01,0xd2,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_or3_b32 v1, v2, v3, v4 // GFX9: v_or3_b32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0x02,0xd2,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_pack_b32_f16 v1, v2, v3 // GFX9: v_pack_b32_f16 v1, v2, v3 ; encoding: [0x01,0x00,0xa0,0xd2,0x02,0x07,0x02,0x00] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0xa0,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] ; encoding: [0x05,0x10,0xa0,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] ; encoding: [0x05,0x40,0xa0,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_xad_u32 v1, v2, v3, v4 // GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_min3_f16 v1, v2, v3, v4 // GFX9: v_min3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf4,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_min3_i16 v1, v2, v3, v4 // GFX9: v_min3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf5,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_min3_u16 v1, v2, v3, v4 // GFX9: v_min3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf6,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_max3_f16 v1, v2, v3, v4 // GFX9: v_max3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf7,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v1, v2, v3, v4 // GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_u16 v1, v2, v3, v4 // GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_med3_f16 v1, v2, v3, v4 // GFX9: v_med3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfa,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_med3_i16 v1, v2, v3, v4 // GFX9: v_med3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfb,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_med3_u16 v1, v2, v3, v4 // GFX9: v_med3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfc,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 // GFX9: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_i32_i16 v5, v1, v2, v3 // GFX9: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf2,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf2,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, -v1, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x20] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, -v2 // GFX9: v_cvt_pknorm_i16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x40] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, -v1, -v2 // GFX9: v_cvt_pknorm_i16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x60] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, |v1|, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, |v1|, v2 ; encoding: [0x05,0x01,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, |v2| // GFX9: v_cvt_pknorm_i16_f16 v5, v1, |v2| ; encoding: [0x05,0x02,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[0,0,0] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_u16_f16 v5, -v1, -v2 // GFX9: v_cvt_pknorm_u16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x9a,0xd2,0x01,0x05,0x02,0x60] +// NOGCN: error: not a valid operand. v_cvt_pknorm_u16_f16 v5, |v1|, |v2| // GFX9: v_cvt_pknorm_u16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x9a,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9a,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_add_i16 v5, v1, v2 // GFX9: v_add_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9e,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: instruction not supported on this GPU v_add_i16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_add_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9e,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_sub_i16 v5, v1, v2 // GFX9: v_sub_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9f,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: instruction not supported on this GPU v_sub_i16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_sub_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9f,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_sub_i16 v5, v1, v2 clamp // GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: invalid operand for instruction v_fma_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, -v2, v3 // GFX9: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x44] +// NOSICI: error: not a valid operand. v_fma_f16 v5, v1, v2, |v3| // GFX9: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_fma_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_fma_legacy_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_fma_legacy_f16 v5, -v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x24] +// NOGCN: error: not a valid operand. v_fma_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_fma_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_fma_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_div_fixup_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, 0.5, v3 // GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, 0.5 // GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x24] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, |v1|, v2, v3 // GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, v1, 0.5, v3 // GFX9: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, v1, v2, 0.5 // GFX9: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] +// NOGCN: error: not a valid operand. v_div_fixup_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_div_fixup_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_mad_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, 0.5, v3 // GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, 0.5 // GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, -v3 // GFX9: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x84] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, |v3| // GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, -1, v3 // GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid literal operand v_mad_i16 v5, v1, v2, v3 clamp // GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, 0.5, v3 // GFX9: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, v2, 0.5 // GFX9: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, -v2, v3 // GFX9: v_mad_legacy_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] +// NOGCN: error: not a valid operand. v_mad_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_mad_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_mad_legacy_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16 v5, v1, -1, v3 // GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16 v5, v1, v2, -4.0 clamp -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: invalid operand for instruction v_mad_legacy_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16 v5, v1, -1, v3 // GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16 v5, v1, v2, -4.0 clamp -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: invalid operand for instruction v_mad_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, -1, v3 // GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid literal operand v_mad_u16 v5, v1, v2, v3 clamp // GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x44] +// NOSICI: error: not a valid operand. v_interp_p2_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: error: not a valid operand. v_interp_p2_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x77,0xd2,0xc0,0x04,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x44] +// NOGCN: error: not a valid operand. v_interp_p2_legacy_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x76,0xd2,0x00,0x04,0x0e,0x04] +// NOGCN: error: not a valid operand. v_interp_p2_legacy_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x76,0xd2,0xc0,0x04,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_cvt_norm_i16_f16_e64 v5, -v1 // GFX9: v_cvt_norm_i16_f16_e64 v5, -v1 ; encoding: [0x05,0x00,0x8d,0xd1,0x01,0x01,0x00,0x20] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_cvt_norm_i16_f16_e64 v5, |v1| // GFX9: v_cvt_norm_i16_f16_e64 v5, |v1| ; encoding: [0x05,0x01,0x8d,0xd1,0x01,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_cvt_norm_u16_f16_e64 v5, -v1 // GFX9: v_cvt_norm_u16_f16_e64 v5, -v1 ; encoding: [0x05,0x00,0x8e,0xd1,0x01,0x01,0x00,0x20] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_cvt_norm_u16_f16_e64 v5, |v1| // GFX9: v_cvt_norm_u16_f16_e64 v5, |v1| ; encoding: [0x05,0x01,0x8e,0xd1,0x01,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_sat_pk_u8_i16_e64 v5, -1 // GFX9: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd1,0xc1,0x00,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_sat_pk_u8_i16_e64 v5, v255 // GFX9: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0x8f,0xd1,0xff,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_screen_partition_4se_b32_e64 v5, v1 // GXF9: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU +// GFX9: v_screen_partition_4se_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] v_screen_partition_4se_b32_e64 v5, -1 // GXF9: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU +// GFX9: v_screen_partition_4se_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] v_add_u32 v84, v13, s31 clamp // GFX9: v_add_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_sub_u32 v84, v13, s31 clamp // GFX9: v_sub_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x35,0xd1,0x0d,0x3f,0x00,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_subrev_u32 v84, v13, s31 clamp // GFX9: v_subrev_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x36,0xd1,0x0d,0x3f,0x00,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_addc_co_u32 v84, s[4:5], v13, v31, vcc clamp // GFX9: v_addc_co_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_subb_co_u32 v84, s[2:3], v13, v31, vcc clamp // GFX9: v_subb_co_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_subbrev_co_u32 v84, vcc, v13, v31, s[6:7] clamp // GFX9: v_subbrev_co_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_add_co_u32 v84, s[4:5], v13, v31 clamp // GFX9: v_add_co_u32_e64 v84, s[4:5], v13, v31 clamp ; encoding: [0x54,0x84,0x19,0xd1,0x0d,0x3f,0x02,0x00] -// NOVI: error: +// NOSICI: error: integer clamping is not supported on this GPU +// NOVI: error: invalid operand for instruction v_sub_co_u32 v84, s[2:3], v13, v31 clamp // GFX9: v_sub_co_u32_e64 v84, s[2:3], v13, v31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x02,0x00] -// NOVI: error: +// NOSICI: error: integer clamping is not supported on this GPU +// NOVI: error: invalid operand for instruction v_subrev_co_u32 v84, vcc, v13, v31 clamp // GFX9: v_subrev_co_u32_e64 v84, vcc, v13, v31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x02,0x00] -// NOVI: error: +// NOSICI: error: integer clamping is not supported on this GPU +// NOVI: error: invalid operand for instruction v_addc_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_addc_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x38] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_subb_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_subb_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x3a] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_subbrev_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_subbrev_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x3c] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_add_co_u32 v84, vcc, v13, v31 // GFX9: v_add_co_u32_e32 v84, vcc, v13, v31 ; encoding: [0x0d,0x3f,0xa8,0x32] @@ -517,97 +658,97 @@ v_subrev_co_u32 v84, vcc, v13, v31 v_add_i32 v1, v2, v3 // GFX9: v_add_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9c,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_add_i32 v1, v2, v3 clamp // GFX9: v_add_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9c,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction v_sub_i32 v1, v2, v3 // GFX9: v_sub_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9d,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_sub_i32 v1, v2, v3 clamp // GFX9: v_sub_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9d,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction //===----------------------------------------------------------------------===// // Validate register size checks (bug 37943) //===----------------------------------------------------------------------===// -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], s0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], s[0:3], v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:2], v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:3], v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:1], s0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, s[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v0, s[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, s[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, v0, s[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, v0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, s[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v0, s[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v0, v[0:1] diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s index 99265352f0cec..43223108163a0 100644 --- a/llvm/test/MC/AMDGPU/vop3-literal.s +++ b/llvm/test/MC/AMDGPU/vop3-literal.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s v_bfe_u32 v0, 0x3039, v1, s1 // GFX10: v_bfe_u32 v0, 0x3039, v1, s1 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00] @@ -44,12 +44,12 @@ v_bfe_u32 v0, 0x3039, 0x12345, v2 // GFX9-ERR: error: invalid literal operand v_bfe_u32 v0, s1, 0x3039, s1 -// GFX10-ERR: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00] // GFX9-ERR: error: invalid literal operand +// GFX10: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00] v_bfe_u32 v0, s1, 0x3039, s2 -// GFX10: error: invalid operand (violates constant bus restrictions) // GFX9-ERR: error: invalid literal operand +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) v_bfm_b32_e64 v0, 0x3039, s1 // GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] @@ -197,12 +197,15 @@ v_min3_i16 v5, 0x5678, 0x5678, 0x5679 v_add_nc_u16 v5, 0xfe0b, v2 // GFX10: v_add_nc_u16_e64 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_add_nc_u16 v5, v1, 0x1234 // GFX10: v_add_nc_u16_e64 v5, v1, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_add_nc_u16 v5, 0x1234, 0x1234 // GFX10: v_add_nc_u16_e64 v5, 0x1234, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfe,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_ashrrev_i16_e64 v5, 0x3456, v2 // GFX10: v_ashrrev_i16_e64 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] @@ -254,6 +257,7 @@ v_cmp_f_i32_e64 s[10:11], 0xaf123456, 0xaf123456 v_cmp_f_i32_e64 s[10:11], 0xaf123456, 0xaf123455 // GFX10-ERR: error: invalid literal operand +// GFX9-ERR: error: invalid literal operand v_cmp_f_u64_e64 s[10:11], 0xaf123456, v[2:3] // GFX10: v_cmp_f_u64_e64 s[10:11], 0xaf123456, v[2:3] ; encoding: [0x0a,0x00,0xe0,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] @@ -269,33 +273,43 @@ v_cmp_f_u64_e64 s[10:11], 0x3f717273, 0x3f717273 v_cmpx_class_f32_e64 0xaf123456, v2 // GFX10: v_cmpx_class_f32_e64 0xaf123456, v2 ; encoding: [0x00,0x00,0x98,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_class_f32_e64 v1, 0xaf123456 // GFX10: v_cmpx_class_f32_e64 v1, 0xaf123456 ; encoding: [0x00,0x00,0x98,0xd4,0x01,0xff,0x01,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_class_f32_e64 0xaf123456, 0xaf123456 // GFX10: v_cmpx_class_f32_e64 0xaf123456, 0xaf123456 ; encoding: [0x00,0x00,0x98,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_class_f32_e64 0xaf123456, 0xaf123455 // GFX10-ERR: error: invalid literal operand +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_lt_i16_e64 v1, 0x3456 // GFX10: v_cmpx_lt_i16_e64 v1, 0x3456 ; encoding: [0x00,0x00,0x99,0xd4,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_lt_i16_e64 0x3456, v2 // GFX10: v_cmpx_lt_i16_e64 0x3456, v2 ; encoding: [0x00,0x00,0x99,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_lt_i16_e64 0x3456, 0x3456 // GFX10: v_cmpx_lt_i16_e64 0x3456, 0x3456 ; encoding: [0x00,0x00,0x99,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_f_i64_e64 0xaf123456, v[2:3] // GFX10: v_cmpx_f_i64_e64 0xaf123456, v[2:3] ; encoding: [0x00,0x00,0xb0,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_f_i64_e64 v[1:2], 0x3f717273 // GFX10: v_cmpx_f_i64_e64 v[1:2], 0x3f717273 ; encoding: [0x00,0x00,0xb0,0xd4,0x01,0xff,0x01,0x00,0x73,0x72,0x71,0x3f] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_f_i64_e64 0x3f717273, 0x3f717273 // GFX10: v_cmpx_f_i64_e64 0x3f717273, 0x3f717273 ; encoding: [0x00,0x00,0xb0,0xd4,0xff,0xfe,0x01,0x00,0x73,0x72,0x71,0x3f] +// GFX9-ERR: error: instruction not supported on this GPU v_lshlrev_b64 v[5:6], 0xaf123456, v[2:3] // GFX10: v_lshlrev_b64 v[5:6], 0xaf123456, v[2:3] ; encoding: [0x05,0x00,0xff,0xd6,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] @@ -307,18 +321,23 @@ v_lshlrev_b64 v[5:6], v1, 0x3f717273 v_fma_mix_f32 v5, 0x123, v2, v3 // GFX10: v_fma_mix_f32 v5, 0x123, v2, v3 ; encoding: [0x05,0x00,0x20,0xcc,0xff,0x04,0x0e,0x04,0x23,0x01,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, v1, 0x7b, v3 // GFX10: v_fma_mix_f32 v5, v1, 0x7b, v3 ; encoding: [0x05,0x00,0x20,0xcc,0x01,0xff,0x0d,0x04,0x7b,0x00,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, v1, v2, 0x1c8 // GFX10: v_fma_mix_f32 v5, v1, v2, 0x1c8 ; encoding: [0x05,0x00,0x20,0xcc,0x01,0x05,0xfe,0x03,0xc8,0x01,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, 0x1c8a, v2, 0x1c8a // GFX10: v_fma_mix_f32 v5, 0x1c8a, v2, 0x1c8a ; encoding: [0x05,0x00,0x20,0xcc,0xff,0x04,0xfe,0x03,0x8a,0x1c,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, 0x1c8a, 0x1c8a, 0x1c8a // GFX10: v_fma_mix_f32 v5, 0x1c8a, 0x1c8a, 0x1c8a ; encoding: [0x05,0x00,0x20,0xcc,0xff,0xfe,0xfd,0x03,0x8a,0x1c,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_pk_add_f16 v5, 0xaf123456, v2 // GFX10: v_pk_add_f16 v5, 0xaf123456, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/vop3-modifiers-err.s b/llvm/test/MC/AMDGPU/vop3-modifiers-err.s index b28768c1ca09f..95811c789e844 100644 --- a/llvm/test/MC/AMDGPU/vop3-modifiers-err.s +++ b/llvm/test/MC/AMDGPU/vop3-modifiers-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s //---------------------------------------------------------------------------// // VOP3 Modifiers diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 2e90817677404..e5ff3f030a6fc 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -1,14 +1,14 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI // Make sure interp instructions disassemble regardless of lds bank count // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI - +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck %s -check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // VOPC Instructions @@ -287,39 +287,42 @@ v_mac_f32_e64 v0, -v1, |v2| // VI: v_mac_f32_e64 v0, -v1, |v2| ; encoding: [0x00,0x02,0x16,0xd1,0x01,0x05,0x02,0x20] v_mac_f16_e64 v0, 0.5, flat_scratch_lo -// NOSICI: error: // VI: v_mac_f16_e64 v0, 0.5, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf0,0xcc,0x00,0x00] +// NOCI: error: instruction not supported on this GPU +// NOSI: error: not a valid operand. v_mac_f16_e64 v0, -4.0, flat_scratch_lo -// NOSICI: error: // VI: v_mac_f16_e64 v0, -4.0, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf7,0xcc,0x00,0x00] +// NOCI: error: instruction not supported on this GPU +// NOSI: error: not a valid operand. v_mac_f16_e64 v0, flat_scratch_lo, -4.0 -// NOSICI: error: // VI: v_mac_f16_e64 v0, flat_scratch_lo, -4.0 ; encoding: [0x00,0x00,0x23,0xd1,0x66,0xee,0x01,0x00] +// NOCI: error: instruction not supported on this GPU +// NOSI: error: not a valid operand. v_add_u32 v84, vcc, v13, s31 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_add_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x19,0xd1,0x0d,0x3f,0x00,0x00] v_sub_u32 v84, s[2:3], v13, s31 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_sub_u32_e64 v84, s[2:3], v13, s31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x00,0x00] v_subrev_u32 v84, vcc, v13, s31 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_subrev_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x00,0x00] v_addc_u32 v84, s[4:5], v13, v31, vcc clamp -// NOSICI: error: +// NOSICI: error: integer clamping is not supported on this GPU // VI: v_addc_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] v_subb_u32 v84, s[2:3], v13, v31, vcc clamp -// NOSICI: error: +// NOSICI: error: integer clamping is not supported on this GPU // VI: v_subb_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] v_subbrev_u32 v84, vcc, v13, v31, s[6:7] clamp -// NOSICI: error: +// NOSICI: error: integer clamping is not supported on this GPU // VI: v_subbrev_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] ///===---------------------------------------------------------------------===// @@ -493,81 +496,107 @@ v_cubeid_f32 v0, |-1|, |-1.0|, |1.0| v_fma_f16_e64 v5, v1, v2, v3 // VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, 0.5 // VI: v_fma_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, -v1, -v2, -v3 // VI: v_fma_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0xe4] +// NOSICI: error: not a valid operand. v_fma_f16 v5, |v1|, |v2|, |v3| // VI: v_fma_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_fma_f16 v5, v1, v2, v3 clamp // VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_div_fixup_f16_e64 v5, v1, v2, v3 // VI: v_div_fixup_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, 0.5, v2, v3 // VI: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, 0.5, v3 // VI: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, 0.5 // VI: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, -4.0 // VI: v_div_fixup_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xde,0x03] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, -v1, v2, v3 // VI: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, v1, |v2|, v3 // VI: v_div_fixup_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, v1, v2, v3 clamp // VI: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_f16_e64 v5, v1, v2, v3 // VI: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, 0.5, v2, v3 // VI: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, 0.5, v3 // VI: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, 0.5 // VI: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, -v2, v3 // VI: v_mad_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, |v3| // VI: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, v3 clamp // VI: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_i16_e64 v5, -1, v2, v3 // VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, -4.0, v3 // NOVI: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, 0 // VI: v_mad_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0x02,0x02] +// NOSICI: error: instruction not supported on this GPU v_mad_u16_e64 v5, -1, v2, v3 // VI: v_mad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0xc1,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, 0, v3 // VI: v_mad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x01,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, -4.0 // NOVI: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU ///===---------------------------------------------------------------------===// // VOP3 with Integer Clamp @@ -606,19 +635,21 @@ v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp // VI: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04] v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp -// NOSICI: error: // VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04] +// NOCI: error: integer clamping is not supported on this GPU +// NOSI: error: invalid operand for instruction v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp -// NOSICI: error: // VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04] +// NOCI: error: integer clamping is not supported on this GPU +// NOSI: error: invalid operand for instruction v_mad_u16 v5, v1, v2, v3 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04] v_mad_i16 v5, v1, v2, v3 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] // diff --git a/llvm/test/MC/AMDGPU/vop3p-err.s b/llvm/test/MC/AMDGPU/vop3p-err.s index 9dfd28a4b9f94..614a348ae133f 100644 --- a/llvm/test/MC/AMDGPU/vop3p-err.s +++ b/llvm/test/MC/AMDGPU/vop3p-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9 --implicit-check-not=error: %s // GFX9: 25: error: invalid operand for instruction v_pk_add_u16 v1, v2, v3 op_sel @@ -15,7 +15,7 @@ v_pk_add_u16 v1, v2, v3 op_sel:[] // GFX9: 33: error: unknown token in expression v_pk_add_u16 v1, v2, v3 op_sel:[,] -// XXGFX9: 34: error: failed parsing operand. +// FIXME: Should trigger an error. // v_pk_add_u16 v1, v2, v3 op_sel:[0] // GFX9: 35: error: expected a comma @@ -51,14 +51,14 @@ v_pk_add_u16 v1, v2, v3 op_sel:[0,-1] // GFX9: 40: error: expected a closing square bracket v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0,0] -// XXGFX9: invalid operand for instruction +// FIXME: should trigger an error v_pk_add_u16 v1, v2, v3 neg_lo:[0,0] // // Regular modifiers on packed instructions // -// FIXME: should be invalid operand for instruction +// FIXME: should be "invalid operand for instruction" // GFX9: :18: error: not a valid operand. v_pk_add_f16 v1, |v2|, v3 @@ -87,5 +87,5 @@ v_pk_add_u16 v1, -v2, v3 // Constant bus restrictions // -// GFX9: invalid operand (violates constant bus restrictions) +// GFX9: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, s1, s2 diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index c059b80fd6de9..e0dfc255a89a1 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -1,61 +1,61 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=VI9 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=VI9 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Check dpp_ctrl values //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff] v_mov_b32 v0, v0 quad_perm:[0,2,1,1] -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff] v_mov_b32 v0, v0 row_shl:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff] v_mov_b32 v0, v0 row_shr:0xf -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff] v_mov_b32 v0, v0 row_ror:0xc -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff] v_mov_b32 v0, v0 wave_shl:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff] v_mov_b32 v0, v0 wave_rol:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff] v_mov_b32 v0, v0 wave_shr:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff] v_mov_b32 v0, v0 wave_ror:1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI9: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff] v_mov_b32 v0, v0 row_mirror -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI9: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff] v_mov_b32 v0, v0 row_half_mirror -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff] v_mov_b32 v0, v0 row_bcast:15 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff] v_mov_b32 v0, v0 row_bcast:31 @@ -63,31 +63,31 @@ v_mov_b32 v0, v0 row_bcast:31 // Check optional fields //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0 @@ -95,19 +95,19 @@ v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0 // Check modifiers //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1] v_add_f32 v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1] v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1] v_add_f32 v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1] v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -115,242 +115,244 @@ v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // Check VOP1 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. +// NOVI: error: not a valid operand. v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1] v_cvt_u32_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1] v_fract_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1] v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x02,0x7e,0x00,0x01,0x09,0xa1] v_mov_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x10,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f16_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x14,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x16,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_rpi_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x18,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_rpi_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_flr_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_flr_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_off_f32_i4_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_off_f32_i4 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte0_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x22,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte0 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte1_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x24,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte1 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte2_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x26,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte2 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte3_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x28,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte3 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_trunc_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x38,0x02,0x7e,0x00,0x01,0x09,0xa1] v_trunc_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ceil_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ceil_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rndne_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rndne_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_floor_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_floor_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_exp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x40,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_log_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x42,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rcp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x44,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rcp_iflag_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x46,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_iflag_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rsq_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x48,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rsq_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sqrt_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x4e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sqrt_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cos_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x54,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cos_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_not_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x56,0x02,0x7e,0x00,0x01,0x09,0xa1] v_not_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_bfrev_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x58,0x02,0x7e,0x00,0x01,0x09,0xa1] v_bfrev_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ffbh_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbh_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ffbl_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbl_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ffbh_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbh_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_exp_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x66,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_exp_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_mant_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x68,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_log_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x98,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_exp_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x96,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f16_u16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x72,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_u16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f16_i16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x74,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_i16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_u16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x76,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_u16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x78,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rcp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sqrt_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sqrt_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rsq_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rsq_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_log_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x80,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_exp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x82,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_mant_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_exp_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_exp_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_floor_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x88,0x02,0x7e,0x00,0x01,0x09,0xa1] v_floor_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ceil_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ceil_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_trunc_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_trunc_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rndne_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rndne_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_fract_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x90,0x02,0x7e,0x00,0x01,0x09,0xa1] v_fract_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sin_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x92,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sin_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cos_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x94,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cos_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // GFX9: v_cvt_norm_i16_f16_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9a,0x0a,0x7e,0x01,0xe4,0x20,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_i16_f16_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX9: v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX9: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9e,0x0a,0x7e,0x01,0x2f,0x01,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0xe4,0x08,0x00] v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 @@ -359,239 +361,239 @@ v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask //===----------------------------------------------------------------------===// // ToDo: VOP2bInst instructions: v_add_u32, v_sub_u32 ... (vcc and ApplyMnemonic in AsmMatcherEmitter.cpp) -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x01,0x01,0xff] v_mac_f32 v0, v0, v0 row_shl:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f32_dpp v0, v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x1f,0x01,0xff] v_mac_f32 v0, v0, v0 row_shr:0xf -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f32_dpp v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x4d,0x08,0xaf] v_mac_f32 v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1] v_add_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1] v_min_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1] v_and_b32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0c,0x02,0x01,0x09,0xa1] v_mul_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sub_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x04,0x02,0x01,0x09,0xa1] v_sub_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_subrev_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x06,0x02,0x01,0x09,0xa1] v_subrev_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0a,0x02,0x01,0x09,0xa1] v_mul_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_hi_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0e,0x02,0x01,0x09,0xa1] v_mul_hi_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x10,0x02,0x01,0x09,0xa1] v_mul_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_hi_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x12,0x02,0x01,0x09,0xa1] v_mul_hi_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x16,0x02,0x01,0x09,0xa1] v_max_f32 v1, v2 v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x18,0x02,0x01,0x09,0xa1] v_min_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1a,0x02,0x01,0x09,0xa1] v_max_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1c,0x02,0x01,0x09,0xa1] v_min_u32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1e,0x02,0x01,0x09,0xa1] v_max_u32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshrrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x20,0x02,0x01,0x09,0xa1] v_lshrrev_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ashrrev_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x22,0x02,0x01,0x09,0xa1] v_ashrrev_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshlrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x24,0x02,0x01,0x09,0xa1] v_lshlrev_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_or_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x28,0x02,0x01,0x09,0xa1] v_or_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_xor_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x2a,0x02,0x01,0x09,0xa1] v_xor_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3e,0x02,0x01,0x09,0xa1] v_add_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sub_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x40,0x02,0x01,0x09,0xa1] v_sub_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_subrev_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x42,0x02,0x01,0x09,0xa1] v_subrev_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x44,0x02,0x01,0x09,0xa1] v_mul_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1] v_mac_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4c,0x02,0x01,0x09,0xa1] v_add_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sub_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4e,0x02,0x01,0x09,0xa1] v_sub_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_subrev_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x50,0x02,0x01,0x09,0xa1] v_subrev_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_lo_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x52,0x02,0x01,0x09,0xa1] v_mul_lo_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshlrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x54,0x02,0x01,0x09,0xa1] v_lshlrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshrrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x56,0x02,0x01,0x09,0xa1] v_lshrrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ashrrev_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x58,0x02,0x01,0x09,0xa1] v_ashrrev_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5a,0x02,0x01,0x09,0xa1] v_max_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5c,0x02,0x01,0x09,0xa1] v_min_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5e,0x02,0x01,0x09,0xa1] v_max_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x60,0x02,0x01,0x09,0xa1] v_max_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x62,0x02,0x01,0x09,0xa1] v_min_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x64,0x02,0x01,0x09,0xa1] v_min_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ldexp_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x66,0x02,0x01,0x09,0xa1] v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_addc_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subb_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error +// NOSICI: error: not a valid operand. // VI9: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00] v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// NOSICI: error +// NOSICI: error: not a valid operand. // VI9: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00] v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index a0c71253df810..88386e046917f 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -1,41 +1,41 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=GFX89 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX89 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --check-prefix=NOGFX89 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOCI,NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX89 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --check-prefix=NOGFX89 --implicit-check-not=error: //---------------------------------------------------------------------------// // Check SDWA operands //---------------------------------------------------------------------------// -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x00] v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x00] v_mov_b32 v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x00] v_mov_b32 v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02] v_min_u32 v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05] v_min_u32 v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06] v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06] v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -43,43 +43,43 @@ v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_se // Check optional operands //---------------------------------------------------------------------------// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_u32_f32_sdwa v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x0e,0x00,0x7e,0x00,0x36,0x06,0x00] v_cvt_u32_f32 v0, v0 clamp dst_sel:DWORD -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_fract_f32_sdwa v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x26,0x06,0x00] v_fract_f32 v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sin_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x05,0x00] v_sin_f32 v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x36,0x05,0x00] v_mov_b32 v1, v0 clamp src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_trunc_f32_sdwa v1, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x36,0x05,0x00] v_trunc_f32 v1, v0 clamp dst_sel:DWORD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x06,0x00] v_mov_b32_sdwa v1, v0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x06] v_add_f32_sdwa v0, v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_min_f32_sdwa v0, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x36,0x06,0x02] v_min_f32 v0, v0, v0 clamp dst_sel:DWORD src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x06,0x02] v_and_b32 v0, v0, v0 dst_unused:UNUSED_PAD src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_i32_i24_sdwa v1, v2, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x36,0x06,0x06] v_mul_i32_i24_sdwa v1, v2, v3 clamp @@ -87,31 +87,31 @@ v_mul_i32_i24_sdwa v1, v2, v3 clamp // Check modifiers //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_fract_f32_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x25,0x00] v_fract_f32 v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sin_f32_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x35,0x00] v_sin_f32 v0, -abs(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_add_f32_sdwa v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x35,0x12] v_add_f32 v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_min_f32_sdwa v0, |v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x25,0x12] v_min_f32 v0, abs(v0), -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x0e,0x00] v_mov_b32_sdwa v1, sext(v0) -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_and_b32_sdwa v0, sext(v0), sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x0e,0x0a] v_and_b32 v0, sext(v0), sext(v0) dst_unused:UNUSED_PAD src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_class_f32 vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] // GFX9: v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 @@ -120,477 +120,479 @@ v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 // Check VOP1 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: instruction not supported on this GPU // GFX89: v_nop ; encoding: [0xf9,0x00,0x00,0x7e,0x00,0x00,0x00,0x00] v_nop_sdwa -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_u32_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0e,0x00,0x7e,0x00,0x06,0x05,0x00] v_cvt_u32_f32 v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_fract_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x05,0x00] v_fract_f32 v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sin_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x05,0x00] v_sin_f32 v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x06,0x05,0x00] v_mov_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_i32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0a,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_i32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0c,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_u32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x10,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f16_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x14,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x16,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_rpi_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x18,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_rpi_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_flr_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x1a,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_flr_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_off_f32_i4_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x1c,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_off_f32_i4 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x22,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte0 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte1_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x24,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte1 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte2_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x26,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte2 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte3_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x28,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte3 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_trunc_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x06,0x05,0x00] v_trunc_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_ceil_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ceil_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rndne_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3c,0x02,0x7e,0x00,0x06,0x05,0x00] v_rndne_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_floor_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3e,0x02,0x7e,0x00,0x06,0x05,0x00] v_floor_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_exp_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x40,0x02,0x7e,0x00,0x06,0x05,0x00] v_exp_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_log_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x42,0x02,0x7e,0x00,0x06,0x05,0x00] v_log_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rcp_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x44,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rcp_iflag_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x46,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_iflag_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rsq_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x48,0x02,0x7e,0x00,0x06,0x05,0x00] v_rsq_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sqrt_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x4e,0x02,0x7e,0x00,0x06,0x05,0x00] v_sqrt_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cos_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x54,0x02,0x7e,0x00,0x06,0x05,0x00] v_cos_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_not_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x56,0x02,0x7e,0x00,0x06,0x05,0x00] v_not_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_bfrev_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x58,0x02,0x7e,0x00,0x06,0x05,0x00] v_bfrev_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ffbh_u32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ffbl_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5c,0x02,0x7e,0x00,0x06,0x05,0x00] v_ffbl_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ffbh_i32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5e,0x02,0x7e,0x00,0x06,0x05,0x00] v_ffbh_i32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_frexp_exp_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x66,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_exp_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_frexp_mant_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x68,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_mant_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: // GFX89: v_log_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x98,0x02,0x7e,0x00,0x06,0x05,0x00] +// NOSI: error: not a valid operand. +// NOCI: error: invalid operand for instruction v_log_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: // GFX89: v_exp_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x96,0x02,0x7e,0x00,0x06,0x05,0x00] +// NOSI: error: not a valid operand. +// NOCI: error: invalid operand for instruction v_exp_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_f16_u16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x72,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_u16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_f16_i16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x74,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_i16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_u16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x76,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_u16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x78,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7a,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sqrt_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7c,0x02,0x7e,0x00,0x06,0x05,0x00] v_sqrt_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_rsq_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7e,0x02,0x7e,0x00,0x06,0x05,0x00] v_rsq_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x80,0x02,0x7e,0x00,0x06,0x05,0x00] v_log_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x82,0x02,0x7e,0x00,0x06,0x05,0x00] v_exp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_frexp_mant_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x84,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_mant_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x86,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_exp_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_floor_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x88,0x02,0x7e,0x00,0x06,0x05,0x00] v_floor_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ceil_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ceil_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_trunc_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8c,0x02,0x7e,0x00,0x06,0x05,0x00] v_trunc_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8e,0x02,0x7e,0x00,0x06,0x05,0x00] v_rndne_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_fract_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x90,0x02,0x7e,0x00,0x06,0x05,0x00] v_fract_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sin_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x92,0x02,0x7e,0x00,0x06,0x05,0x00] v_sin_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cos_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x94,0x02,0x7e,0x00,0x06,0x05,0x00] v_cos_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX9: v_cvt_norm_i16_f16_sdwa v5, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9a,0x0a,0x7e,0x01,0x06,0x16,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_i16_f16_sdwa v5, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX9: v_cvt_norm_i16_f16_sdwa v5, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9a,0x0a,0x7e,0x01,0x06,0x26,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_i16_f16_sdwa v5, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX9: v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x9c,0x0a,0x7e,0x01,0x16,0x06,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD // GFX9: v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x9c,0x0a,0x7e,0x01,0x06,0x05,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX9: v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9e,0x0a,0x7e,0x01,0x06,0x0e,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// // Check VOP2 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x02] v_add_f32 v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_min_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x05,0x02] v_min_f32 v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x05,0x02] v_and_b32 v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x06,0x05,0x02] v_mul_i32_i24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sub_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x04,0x02,0x06,0x05,0x02] v_sub_f32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_subrev_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x06,0x02,0x06,0x05,0x02] v_subrev_f32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0a,0x02,0x06,0x05,0x02] v_mul_f32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_hi_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0e,0x02,0x06,0x05,0x02] v_mul_hi_i32_i24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_u32_u24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x10,0x02,0x06,0x05,0x02] v_mul_u32_u24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_hi_u32_u24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x12,0x02,0x06,0x05,0x02] v_mul_hi_u32_u24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_max_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x16,0x02,0x06,0x05,0x02] v_max_f32 v1, v2 v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x18,0x02,0x06,0x05,0x02] v_min_i32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1a,0x02,0x06,0x05,0x02] v_max_i32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1c,0x02,0x06,0x05,0x02] v_min_u32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_u32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1e,0x02,0x06,0x05,0x02] v_max_u32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshrrev_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x20,0x02,0x06,0x05,0x02] v_lshrrev_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ashrrev_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x22,0x02,0x06,0x05,0x02] v_ashrrev_i32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshlrev_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x24,0x02,0x06,0x05,0x02] v_lshlrev_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x28,0x02,0x06,0x05,0x02] v_or_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_xor_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x2a,0x02,0x06,0x05,0x02] v_xor_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_add_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3e,0x02,0x06,0x05,0x02] v_add_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sub_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x40,0x02,0x06,0x05,0x02] v_sub_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_subrev_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x42,0x02,0x06,0x05,0x02] v_subrev_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x44,0x02,0x06,0x05,0x02] v_mul_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_add_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4c,0x02,0x06,0x05,0x02] v_add_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sub_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4e,0x02,0x06,0x05,0x02] v_sub_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_subrev_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x50,0x02,0x06,0x05,0x02] v_subrev_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_lo_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x52,0x02,0x06,0x05,0x02] v_mul_lo_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshlrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x54,0x02,0x06,0x05,0x02] v_lshlrev_b16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshrrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x56,0x02,0x06,0x05,0x02] v_lshrrev_b16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ashrrev_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x58,0x02,0x06,0x05,0x02] v_ashrrev_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5a,0x02,0x06,0x05,0x02] v_max_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5c,0x02,0x06,0x05,0x02] v_min_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5e,0x02,0x06,0x05,0x02] v_max_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x60,0x02,0x06,0x05,0x02] v_max_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x62,0x02,0x06,0x05,0x02] v_min_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x64,0x02,0x06,0x05,0x02] v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ldexp_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x66,0x02,0x06,0x05,0x02] v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: not a valid operand. // GFX9: v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: not a valid operand. // GFX9: v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: not a valid operand. // GFX9: v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error +// NOSICI: error: not a valid operand. // GFX89: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06] v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06] v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error +// NOSICI: error: not a valid operand. // GFX89: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e] v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD @@ -603,72 +605,72 @@ v_cndmask_b32_sdwa v5, vcc_lo, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE // Check VOPC opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmp_eq_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmp_nle_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmpx_gt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmpx_nlt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_lt_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_t_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_eq_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_ne_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_f_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_gt_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_ne_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 @@ -681,22 +683,22 @@ v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 // v_mac_f16/f32 is prohibited //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mac_f32_sdwa v3, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x0a,0x06,0x2c,0x04,0x16,0x05,0x06] // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v3, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mac_f32_sdwa v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 src1_sel:DWORD ; encoding: [0xf9,0x84,0x1f,0x2c,0x63,0x0e,0x04,0x06] // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v194, v13, v1 dst_sel:BYTE_0 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_mac_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x46,0x02,0x06,0x05,0x02] // NOGFX9: error: instruction not supported on this GPU v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -705,312 +707,318 @@ v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_se // Scalar registers are allowed //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x86,0x00] v_mov_b32 v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x7e,0x10,0x86,0x00] v_mov_b32 v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x78,0x10,0x86,0x00] v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x85,0x02] v_add_f32 v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v0, v0, s22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x00,0x02,0x00,0x06,0x05,0x82] v_add_f32 v0, v0, s22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +// NOGFX9: error: invalid operand for instruction // NO: invalid operand (violates constant bus restrictions) v_add_f32 v0, exec_lo, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_add_f32 v0, v1, tba_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_add_f32 v0, v1, tma_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x85,0x02] v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xf8,0x05,0x02] v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_cmp_eq_f32_sdwa tba, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_cmp_eq_f32_sdwa tma, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0xf6,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32_sdwa vcc, exec_lo, vcc_lo src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOVI: error: invalid operand for instruction // GFX9: v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0x66,0x06,0x86,0x00] +// NOSI: error: not a valid operand. +// NOCI: error: not a valid operand. v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// // Inline constants are allowed (though semantics is not clear yet) //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0x80,0x06,0x86,0x00] v_mov_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xc1,0x06,0x86,0x00] v_mov_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xf0,0x06,0x86,0x00] v_mov_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xf7,0x06,0x86,0x00] v_mov_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xc1,0x16,0x8e,0x00] v_mov_b32_sdwa v5, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x06,0x86,0x06] v_add_f32_sdwa v5, -1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, |-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0xa6,0x06] v_add_f32_sdwa v5, |-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, neg(-1), -|v2| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0x96,0x36] v_add_f32_sdwa v5, neg(-1), -|v2| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -|-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0xb6,0x06] v_add_f32_sdwa v5, -|-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, 0.5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0x86,0x06] v_add_f32_sdwa v5, 0.5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, |-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0xa6,0x06] v_add_f32_sdwa v5, |-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, neg(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0x96,0x06] v_add_f32_sdwa v5, neg(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -|-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0xb6,0x06] v_add_f32_sdwa v5, -|-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -4.0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0x86] v_add_f32_sdwa v5, v2, -4.0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, |-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0xa6] v_add_f32_sdwa v5, v2, |-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, neg(-4.0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0x96] v_add_f32_sdwa v5, v2, neg(-4.0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -|-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0xb6] v_add_f32_sdwa v5, v2, -|-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0x86] v_add_f32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0xa6] v_add_f32_sdwa v5, v2, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0x96] v_add_f32_sdwa v5, v2, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0xb6] v_add_f32_sdwa v5, v2, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x26,0xf7,0x16,0x86,0x06] v_and_b32_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x26,0xf7,0x16,0x8e,0x06] v_and_b32_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x26,0x02,0x16,0x06,0x86] v_and_b32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x26,0x02,0x16,0x06,0x8e] v_and_b32_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0x86,0x00] v_exp_f16_sdwa v5, -1 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0xa6,0x00] v_exp_f16_sdwa v5, |-1| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0x96,0x00] v_exp_f16_sdwa v5, neg(-1) -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0xb6,0x00] v_exp_f16_sdwa v5, -|-1| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0x86,0x00] v_exp_f16_sdwa v5, 0.5 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, |0.5| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0xa6,0x00] v_exp_f16_sdwa v5, |0.5| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, neg(0.5) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0x96,0x00] v_exp_f16_sdwa v5, neg(0.5) -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -|0.5| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0xb6,0x00] v_exp_f16_sdwa v5, -|0.5| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_max_i16_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x60,0x02,0x16,0x06,0x86] v_max_i16_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_max_i16_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x60,0x02,0x16,0x06,0x8e] v_max_i16_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], -4.0, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0x86,0x06] v_cmp_eq_f32_sdwa s[6:7], -4.0, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], |-4.0|, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0xa6,0x06] v_cmp_eq_f32_sdwa s[6:7], |-4.0|, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], neg(-4.0), v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0x96,0x06] v_cmp_eq_f32_sdwa s[6:7], neg(-4.0), v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], -|-4.0|, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0xb6,0x06] v_cmp_eq_f32_sdwa s[6:7], -|-4.0|, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, -1 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0x86] v_cmp_eq_f32_sdwa s[6:7], v2, -1 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, |-1| src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0xa6] v_cmp_eq_f32_sdwa s[6:7], v2, |-1| src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, neg(-1) src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0x96] v_cmp_eq_f32_sdwa s[6:7], v2, neg(-1) src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0xb6] v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD @@ -1018,19 +1026,19 @@ v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD // Literals are not allowed //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction v_add_f32 v0, v1, 3.45 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOGFX89: error: invalid operand for instruction v_cmpx_class_f32 vcc, v1, 200 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOGFX89: error: invalid operand for instruction v_cmpx_class_f32 vcc, 200, v1 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOGFX89: error: invalid operand for instruction v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD @@ -1038,18 +1046,18 @@ v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // VOPC with arbitrary SGPR destination //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x82,0x05,0x02] v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xfe,0x05,0x02] v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x02,0xfe,0x85,0x02] v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1057,23 +1065,23 @@ v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // OMod output modifier allowed //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_trunc_f32_sdwa v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0x50,0x06,0x00] v_trunc_f32 v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_trunc_f32_sdwa v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0xf0,0x06,0x00] v_trunc_f32 v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_add_f32_sdwa v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x46,0x05,0x02] v_add_f32 v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_add_f32_sdwa v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0xe6,0x05,0x02] v_add_f32 v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1081,8 +1089,8 @@ v_add_f32 v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WO // Check Instructions //---------------------------------------------------------------------------// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_screen_partition_4se_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:BYTE_0 ; encoding: [0xf9,0x6e,0x0a,0x7e,0x01,0x16,0x00,0x00] v_screen_partition_4se_b32_sdwa v5, v1 src0_sel:BYTE_0 diff --git a/llvm/test/MC/AMDGPU/vopc-errs.s b/llvm/test/MC/AMDGPU/vopc-errs.s index bc8902f051ad7..4998aebe0b04b 100644 --- a/llvm/test/MC/AMDGPU/vopc-errs.s +++ b/llvm/test/MC/AMDGPU/vopc-errs.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s // Force 32-bit encoding with non-vcc result diff --git a/llvm/test/MC/AMDGPU/vopc-vi.s b/llvm/test/MC/AMDGPU/vopc-vi.s index f79923dfbd2ec..f4c796528200f 100644 --- a/llvm/test/MC/AMDGPU/vopc-vi.s +++ b/llvm/test/MC/AMDGPU/vopc-vi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s - // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOSICI --implicit-check-not=error: %s v_cmp_class_f16 vcc, v2, v4 // VI: v_cmp_class_f16_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x28,0x7c] diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s index b9532aebd1579..b9f6af4b28169 100644 --- a/llvm/test/MC/AMDGPU/wave32.s +++ b/llvm/test/MC/AMDGPU/wave32.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1032 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1064 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR --implicit-check-not=error: %s v_cmp_ge_i32_e32 s0, v0 // GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d] diff --git a/llvm/test/MC/AMDGPU/xdl-insts-err.s b/llvm/test/MC/AMDGPU/xdl-insts-err.s index 8f596bea7aad0..d774260bf941c 100644 --- a/llvm/test/MC/AMDGPU/xdl-insts-err.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX906-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX908-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX906-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX908-ERR --implicit-check-not=error: %s // GFX906-ERR: error: instruction not supported on this GPU v_dot2c_f32_f16 v0, v1, v2 diff --git a/llvm/test/MC/AMDGPU/xnack-mask.s b/llvm/test/MC/AMDGPU/xnack-mask.s index c88a8c2985070..0fa5242d37899 100644 --- a/llvm/test/MC/AMDGPU/xnack-mask.s +++ b/llvm/test/MC/AMDGPU/xnack-mask.s @@ -1,9 +1,9 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1001 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1001 %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s 2>&1 | FileCheck -check-prefix=XNACKERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney %s 2>&1 | FileCheck -check-prefix=XNACKERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s | FileCheck -check-prefix=XNACK %s s_mov_b64 xnack_mask, -1 diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index fced0be79d389..8948bf92b7d76 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -93,7 +93,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101030E1305110155170000022E00030E110112060000032E00030E11011201000000 - sectname: __debug_info segname: __DWARF addr: 0x0000000000000024 @@ -106,7 +105,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 520000000400000000000801010000000400000000000000000000000000020D000000000000000000000020000000031700000000000000000000003000000000000000022100000000100000000000000010000000 - sectname: __debug_ranges segname: __DWARF addr: 0x000000000000007A @@ -119,7 +117,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 0000000000000000200000000000000000000000000000003000000000000000FFFFFFFFFFFFFFFF00100000000000000000000000000000001000000000000000000000000000000000000000000000 - sectname: __debug_str segname: __DWARF addr: 0x00000000000000CA @@ -132,7 +129,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 002F746D702F6D61696E2E630073747269707065643100737472697070656432006D61696E00 - cmd: LC_SYMTAB cmdsize: 24 symoff: 0 diff --git a/llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml b/llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml new file mode 100644 index 0000000000000..a4b2b4810c5a5 --- /dev/null +++ b/llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml @@ -0,0 +1,45 @@ +## Test that yaml2obj emits an error message if we specify the DWARF section +## contents both in the 'DWARF' entry and in the 'content'. + +# RUN: not yaml2obj %s 2>&1 | FileCheck %s --check-prefix=CONFLICT + +# CONFLICT: yaml2obj: error: cannot specify section '__debug_str' contents in the 'DWARF' entry and the 'content' at the same time + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_str + segname: __DWARF + addr: 0x00 + size: 12 + offset: 528 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 1234 +DWARF: + debug_str: [ a, abc ] diff --git a/llvm/test/Other/debugcounter-earlycse.ll b/llvm/test/Other/debugcounter-earlycse.ll index 3d0a9cdbd3a23..daaf7c9e4a75f 100644 --- a/llvm/test/Other/debugcounter-earlycse.ll +++ b/llvm/test/Other/debugcounter-earlycse.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse < %s 2>&1 | FileCheck %s +; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse -earlycse-debug-hash < %s 2>&1 | FileCheck %s ;; Test that, with debug counters on, we only optimize the second CSE opportunity. define i32 @test(i32 %a, i32 %b) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Other/invariant.group.ll b/llvm/test/Other/invariant.group.ll index 3b2cacedbd947..36f1e33688e42 100644 --- a/llvm/test/Other/invariant.group.ll +++ b/llvm/test/Other/invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt -S -gvn < %s | FileCheck %s ; RUN: opt -S -newgvn < %s | FileCheck %s ; RUN: opt -S -O3 < %s | FileCheck %s @@ -101,4 +101,4 @@ declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) -!0 = !{} \ No newline at end of file +!0 = !{} diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll index 45c8bd55fa014..a156301c1c26b 100644 --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -lint -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes=lint -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64" declare fastcc void @bar() diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 4b7e8eed1f256..e606e7cfac171 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -140,9 +140,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index c3c4b9d6d80cd..aaee6f786bac9 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -145,9 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 26dfee08b2cef..b2d2f85ae21be 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -145,9 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index 3e778ea7e9755..cc91707c4b009 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -126,9 +126,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-bisect-legacy-pass-manager.ll b/llvm/test/Other/opt-bisect-legacy-pass-manager.ll index bf89e80d49604..297f61230c9dd 100644 --- a/llvm/test/Other/opt-bisect-legacy-pass-manager.ll +++ b/llvm/test/Other/opt-bisect-legacy-pass-manager.ll @@ -83,14 +83,14 @@ ; Test a function pass. -; RUN: opt -disable-output -disable-verify -early-cse -opt-bisect-limit=-1 \ +; RUN: opt -disable-output -disable-verify -early-cse -earlycse-debug-hash -opt-bisect-limit=-1 \ ; RUN: %s 2>&1 | FileCheck %s --check-prefix=CHECK-EARLY-CSE ; CHECK-EARLY-CSE: BISECT: running pass ({{[0-9]+}}) Early CSE on function (f1) ; CHECK-EARLY-CSE: BISECT: running pass ({{[0-9]+}}) Early CSE on function (f2) ; CHECK-EARLY-CSE: BISECT: running pass ({{[0-9]+}}) Early CSE on function (f3) -; RUN: opt -disable-output -disable-verify -early-cse -opt-bisect-limit=0 %s \ -; RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-EARLY-CSE +; RUN: opt -disable-output -disable-verify -early-cse -earlycse-debug-hash -opt-bisect-limit=0 \ +; RUN: %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-EARLY-CSE ; CHECK-NOT-EARLY-CSE: BISECT: NOT running pass ({{[0-9]+}}) Early CSE on function (f1) ; CHECK-NOT-EARLY-CSE: BISECT: NOT running pass ({{[0-9]+}}) Early CSE on function (f2) ; CHECK-NOT-EARLY-CSE: BISECT: NOT running pass ({{[0-9]+}}) Early CSE on function (f3) diff --git a/llvm/test/Other/print-debug-counter.ll b/llvm/test/Other/print-debug-counter.ll index 3647f39026dcf..846817add4b84 100644 --- a/llvm/test/Other/print-debug-counter.ll +++ b/llvm/test/Other/print-debug-counter.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse \ +; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse -earlycse-debug-hash \ ; RUN: -debug-counter=newgvn-vn-skip=1,newgvn-vn-count=2 -newgvn \ ; RUN: -instcombine -print-debug-counter < %s 2>&1 | FileCheck %s ;; Test debug counter prints correct info in right order. diff --git a/llvm/test/ThinLTO/X86/Inputs/import-metadata.ll b/llvm/test/ThinLTO/X86/Inputs/import-metadata.ll new file mode 100644 index 0000000000000..d8be887928a2d --- /dev/null +++ b/llvm/test/ThinLTO/X86/Inputs/import-metadata.ll @@ -0,0 +1,23 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +define i32 @foo(i32 %goo) { +entry: + %goo.addr = alloca i32, align 4 + store i32 %goo, i32* %goo.addr, align 4 + %0 = load i32, i32* %goo.addr, align 4 + %1 = load i32, i32* %goo.addr, align 4 + %mul = mul nsw i32 %0, %1 + ret i32 %mul +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.md = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !4) +!1 = !DIFile(filename: "foo.cpp", directory: "tmp") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{} +!5 = !{!4} diff --git a/llvm/test/ThinLTO/X86/import-metadata.ll b/llvm/test/ThinLTO/X86/import-metadata.ll new file mode 100644 index 0000000000000..f938fdd5c93c9 --- /dev/null +++ b/llvm/test/ThinLTO/X86/import-metadata.ll @@ -0,0 +1,40 @@ +; RUN: opt -thinlto-bc %s -o %t1.bc +; RUN: opt -thinlto-bc %p/Inputs/import-metadata.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t1.bc %t2.bc -o %t-out \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo,l \ +; RUN: -r=%t2.bc,foo,pl +; RUN: llvm-dis %t-out.1.3.import.bc -o - | FileCheck %s + +;; Check the imported DICompileUnit doesn't have the enums operand. +;; Also check the imported md metadata that shares a node with the +;; enums operand originally is not null. + +; CHECK: !llvm.dbg.cu = !{![[#CU1:]], ![[#CU2:]]} +;; Note that MD1 comes from the current module. MD2 is from the imported module. +;; We are checking if the imported MD2 doesn't end up having a null operand. +; CHECK: !llvm.md = !{![[#MD1:]], ![[#MD2:]]} +; CHECK: ![[#MD3:]] = !{} +; CHECK: ![[#CU2]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: ![[#FILE2:]], isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) +; CHECK: ![[#MD2]] = !{![[#MD3]]} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +declare i32 @foo(i32 %goo) + +define i32 @main() { + call i32 @foo(i32 0) + ret i32 0 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.md = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !4) +!1 = !DIFile(filename: "main.cpp", directory: "tmp") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{} +!5 = !{!4} diff --git a/llvm/test/Transforms/BDCE/intrinsics.ll b/llvm/test/Transforms/BDCE/intrinsics.ll new file mode 100644 index 0000000000000..5a186f01fd298 --- /dev/null +++ b/llvm/test/Transforms/BDCE/intrinsics.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -bdce < %s | FileCheck %s + +declare i8 @llvm.umax.i8(i8, i8) +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.smax.i8(i8, i8) +declare i8 @llvm.smin.i8(i8, i8) + +define i8 @umax(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @umax( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} + +define i8 @umin(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @umin( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} + +define i8 @smax(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @smax( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} + +define i8 @smin(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @smin( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll index c1674ad4ca45d..adb1930ca7829 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll @@ -87,10 +87,9 @@ define <4 x i32> @global_struct_splat() { define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) { ; CHECK-LABEL: @splat_ptr_gather( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[PTR:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]]) -; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer @@ -100,9 +99,8 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; CHECK-LABEL: @splat_ptr_scatter( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[PTR:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP2]], i32 4, <4 x i1> [[MASK:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]]) ; CHECK-NEXT: ret void ; %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll index d54fe130d05f6..5aeda18309724 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -68,13 +68,12 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp { ret void } -; According to the current LangRef, memcpy's source and destination cannot -; overlap, hence the first memcpy is dead. -; -; Previously this was not allowed (PR8728), also discussed in PR11763. +; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not +; inequal and overlapping). define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) @@ -84,7 +83,8 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll index 04cdae285d814..763362dd3d479 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -37,10 +37,12 @@ bb3: ret void } - +; We cannot remove the store in the entry block, because @unknown_func could +; unwind and the stored value could be read by the caller. define void @test17(i32* noalias %P) { ; CHECK-LABEL: @test17( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] ; CHECK: bb1: ; CHECK-NEXT: call void @unknown_func() @@ -84,31 +86,6 @@ bb3: ret void } - -define void @test6(i32* noalias %P) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] -; CHECK-NEXT: ret void -; - store i32 0, i32* %P - br i1 true, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - call void @unknown_func() - br label %bb3 -bb3: - store i32 0, i32* %P - ret void -} - define void @test19(i32* noalias %P) { ; CHECK-LABEL: @test19( ; CHECK-NEXT: entry: @@ -173,71 +150,6 @@ bb3: ret void } - -define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { -; CHECK-LABEL: @test22( -; CHECK-NEXT: store i32 2, i32* [[P:%.*]] -; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] -; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] -; CHECK-NEXT: ret i32 [[L]] -; - store i32 1, i32* %Q - store i32 2, i32* %P - store i32 3, i32* %Q - %l = load i32, i32* %R - ret i32 %l -} - - -define void @test23(i32* noalias %P) { -; CHECK-LABEL: @test23( -; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] -; CHECK-NEXT: ret void -; - br i1 true, label %bb1, label %bb2 -bb1: - store i32 0, i32* %P - br label %bb3 -bb2: - call void @unknown_func() - br label %bb3 -bb3: - store i32 0, i32* %P - ret void -} - - -define void @test24(i32* noalias %P) { -; CHECK-LABEL: @test24( -; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] -; CHECK-NEXT: ret void -; - br i1 true, label %bb2, label %bb1 -bb1: - store i32 0, i32* %P - br label %bb3 -bb2: - call void @unknown_func() - br label %bb3 -bb3: - store i32 0, i32* %P - ret void -} - define i8* @test26() { ; CHECK-LABEL: @test26( ; CHECK-NEXT: bb1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll index 93a9a2d999e1a..f6031e86bef07 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -30,21 +29,6 @@ bb3: ret void } -define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { -; CHECK-LABEL: @test22( -; CHECK-NEXT: store i32 2, i32* [[P:%.*]] -; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] -; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] -; CHECK-NEXT: ret i32 [[L]] -; - store i32 1, i32* %Q - store i32 2, i32* %P - store i32 3, i32* %Q - %l = load i32, i32* %R - ret i32 %l -} - - define void @test23(i32* noalias %P) { ; CHECK-LABEL: @test23( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index 5c04e11b4a78e..9f719746f9f17 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -554,10 +554,12 @@ define void @test37_atomic(i8* %P, i8* %Q, i8* %R) { ret void } -; The memmove is dead, because memcpy arguments cannot overlap. +; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not +; inequal and overlapping). define void @test38(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; @@ -566,10 +568,12 @@ define void @test38(i8* %P, i8* %Q, i8* %R) { ret void } -; The memmove is dead, because memcpy arguments cannot overlap. +; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not +; inequal and overlapping). define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; @@ -749,3 +753,17 @@ define void @test47_volatile(i32* %P) { store volatile i32 3, i32* %P, align 4 ret void } + +define i32 @test48(i32* %P, i32* noalias %Q, i32* %R) { +; CHECK-LABEL: @test48( +; CHECK-NEXT: store i32 2, i32* [[P:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]], align 4 +; CHECK-NEXT: ret i32 [[L]] +; + store i32 1, i32* %Q + store i32 2, i32* %P + store i32 3, i32* %Q + %l = load i32, i32* %R + ret i32 %l +} diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll similarity index 92% rename from llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll index 25c2d5ffe7f56..826732c07af91 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s ; PR9561 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.8" diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll index 7e46d28a9c47f..3f77349879170 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin" diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll index 665d772d03b91..6837a3dd852f8 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll similarity index 89% rename from llvm/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll index 3501b43600168..7b74dde7d6b00 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S -enable-dse-partial-overwrite-tracking | FileCheck %s ; PR28588 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll index 5bb29529c2665..7e8c9ca3bd8a7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare i8* @_Znwj(i32) local_unnamed_addr declare void @foo() readnone diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll index b5d9c40cbdbc3..56334be182918 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s define void @write4to7(i32* nocapture %p) { ; CHECK-LABEL: @write4to7( diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll index b6ae657d17e5e..a8f09e3e3b44a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" %struct.vec2 = type { <4 x i32>, <4 x i32> } diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll similarity index 95% rename from llvm/test/Transforms/DeadStoreElimination/PartialStore.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll index 1dd894e6658cc..bb6d024701bac 100644 --- a/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Ensure that the dead store is deleted in this case. It is wholely diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll similarity index 83% rename from llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll index ebcb0c3808a15..4fb271b134585 100644 --- a/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s -; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s +; RUN: opt < %s --data-layout "e" -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s +; RUN: opt < %s --data-layout "E" -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s ; This test used to hit an assertion (see PR41949). ; diff --git a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll index 6a5f4bb9eb25c..ee56a6df6aab8 100644 --- a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -dse -S | FileCheck %s +; RUN: opt < %s -dse -enable-dse-memoryssa=false -S | FileCheck %s ; Both stores should be emitted because we can't tell if the gather aliases. diff --git a/llvm/test/Transforms/DeadStoreElimination/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/atomic.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll index 29850b7ac27f8..ec1917f82b817 100644 --- a/llvm/test/Transforms/DeadStoreElimination/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll similarity index 95% rename from llvm/test/Transforms/DeadStoreElimination/calloc-store.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll index 5bd384b033fbc..cfc73f45853d0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/calloc-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare noalias i8* @calloc(i64, i64) diff --git a/llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll index a57693a1da38e..0e98e966ce1db 100644 --- a/llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=false < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" diff --git a/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll similarity index 91% rename from llvm/test/Transforms/DeadStoreElimination/const-pointers.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll index a2218b725cd3b..16be53c7a23b3 100644 --- a/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %t = type { i32 } diff --git a/llvm/test/Transforms/DeadStoreElimination/crash.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/crash.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll index ccee7fb8ba58b..d211ff97dac7a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/crash.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin10.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll index b403e3382234d..9df0bad79929a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll similarity index 92% rename from llvm/test/Transforms/DeadStoreElimination/debuginfo.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll index b927965dc4054..fbca29decdece 100644 --- a/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -debugify -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -debugify -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/dominate.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll similarity index 87% rename from llvm/test/Transforms/DeadStoreElimination/dominate.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll index 24dd65e07bbc2..63f1a3ffc8ff5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/dominate.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -disable-output < %s +; RUN: opt -dse -enable-dse-memoryssa=false -disable-output < %s ; test that we don't crash declare void @bar() diff --git a/llvm/test/Transforms/DeadStoreElimination/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/fence.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll index 0ef29b0bd7222..ce2dcd3236b74 100644 --- a/llvm/test/Transforms/DeadStoreElimination/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s ; We conservative choose to prevent dead store elimination ; across release or stronger fences. It's not required diff --git a/llvm/test/Transforms/DeadStoreElimination/free.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll similarity index 94% rename from llvm/test/Transforms/DeadStoreElimination/free.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll index 66ccc7b4f47b5..275b7e372f111 100644 --- a/llvm/test/Transforms/DeadStoreElimination/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/inst-limits.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll index e9e46df402e9d..6e5f4c150459d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/inst-limits.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; If there are two stores to the same location, DSE should be able to remove diff --git a/llvm/test/Transforms/DeadStoreElimination/int_sideeffect.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll similarity index 80% rename from llvm/test/Transforms/DeadStoreElimination/int_sideeffect.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll index 035e787f6bd7a..806c7362267d1 100644 --- a/llvm/test/Transforms/DeadStoreElimination/int_sideeffect.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -dse | FileCheck %s +; RUN: opt -S < %s -dse -enable-dse-memoryssa=false | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/invariant.start.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll index 27400cd4ed16c..5a0c1f05a2d37 100644 --- a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll @@ -1,5 +1,5 @@ ; Test to make sure llvm.invariant.start calls are not treated as clobbers. -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly diff --git a/llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll index dbbc9451edd09..815ad02d4fc0f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s ; CHECK-LABEL: void @skipBarrier(i8* %ptr) define void @skipBarrier(i8* %ptr) { @@ -62,4 +62,4 @@ define void @skip4Barriers(i8* %ptr) { declare i8* @llvm.launder.invariant.group.p0i8(i8*) -declare i8* @llvm.strip.invariant.group.p0i8(i8*) \ No newline at end of file +declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/DeadStoreElimination/libcalls.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll similarity index 96% rename from llvm/test/Transforms/DeadStoreElimination/libcalls.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll index a18021d0c5c65..692a9a03ea824 100644 --- a/llvm/test/Transforms/DeadStoreElimination/libcalls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/libcalls2.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll similarity index 82% rename from llvm/test/Transforms/DeadStoreElimination/libcalls2.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll index df3011314e9eb..23440f18b052a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/libcalls2.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll similarity index 92% rename from llvm/test/Transforms/DeadStoreElimination/lifetime.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll index e9999ff0740f3..3f20d9e1dda2d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll similarity index 94% rename from llvm/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll index 4c6776b3758ef..b46e1392723cf 100644 --- a/llvm/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -dse -memdep-block-scan-limit=3 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -memdep-block-scan-limit=3 < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false -memdep-block-scan-limit=3 < %s | FileCheck %s +; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa=false -memdep-block-scan-limit=3 < %s | FileCheck %s ; Test case to check that the memory dependency analysis gets the same ; result even if we have a dbg value between the memcpy and diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll similarity index 96% rename from llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll index 68943d383ba6f..4a54d848b65c6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind @@ -38,7 +38,6 @@ define void @test3() { ; CHECK-NEXT: ret void ; %A = alloca i8 - %B = alloca i8 store i8 0, i8* %A ;; Written to by memset @@ -87,7 +86,6 @@ define void @test6() { ; CHECK-NEXT: ret void ; %A = alloca i16, i16 1024, align 2 - %B = alloca i16, i16 1024, align 2 store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memset diff --git a/llvm/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll index c0f490c0d6c25..ea07f91492393 100644 --- a/llvm/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll @@ -1,7 +1,7 @@ ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. -; RUN: opt -S -march=native -dse < %s| FileCheck %s +; RUN: opt -S -march=native -dse -enable-dse-memoryssa=false < %s| FileCheck %s ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] ; CHECK: ![[DBG]] = !DILocation(line: 2, diff --git a/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll index 8d44855f2f97b..1d4f7b38b4c02 100644 --- a/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" define void @byte_by_byte_replacement(i32 *%ptr) { @@ -40,7 +40,6 @@ entry: %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above diff --git a/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/merge-stores.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll index c7f86ab29e3e0..b3e16f8e5a8ec 100644 --- a/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" define void @byte_by_byte_replacement(i32 *%ptr) { @@ -39,7 +39,6 @@ entry: %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above diff --git a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll similarity index 89% rename from llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll index aec3076678787..ae32017c6a459 100644 --- a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll similarity index 94% rename from llvm/test/Transforms/DeadStoreElimination/operand-bundles.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll index f3df74be031b7..18117b3def8df 100644 --- a/llvm/test/Transforms/DeadStoreElimination/operand-bundles.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare noalias i8* @malloc(i64) "malloc-like" diff --git a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll similarity index 95% rename from llvm/test/Transforms/DeadStoreElimination/pr11390.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll index 56ca604eff98b..ac806d6f170a0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s ; PR11390 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/simple.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll index 871190c584120..78f8a7c245254 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/tail-byval.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll similarity index 91% rename from llvm/test/Transforms/DeadStoreElimination/tail-byval.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll index ed2fbd434a75d..0b4a76fee55cd 100644 --- a/llvm/test/Transforms/DeadStoreElimination/tail-byval.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s ; Don't eliminate stores to allocas before tail calls to functions that use ; byval. It's correct to mark calls like these as 'tail'. To implement this tail diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll index 698d5d1e61c3e..18bc6c5922237 100644 --- a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll +++ b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse | FileCheck %s +; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes=early-cse-memssa | FileCheck %s diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll b/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll index 1a11fa17dd42d..16a5f07684c9a 100644 --- a/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll +++ b/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt -S -basic-aa -early-cse-memssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll index 0fae469341d04..9333edab051b0 100644 --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -mtriple=amdgcn-- -early-cse | FileCheck %s +; RUN: opt < %s -S -mtriple=amdgcn-- -early-cse -earlycse-debug-hash | FileCheck %s ; CHECK-LABEL: @no_cse ; CHECK: call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %in, i32 0, i32 0) diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll index 6b42ee8d71efc..8e618b5cfab00 100644 --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa -earlycse-debug-hash < %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; CHECK-LABEL: @memrealtime( diff --git a/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll b/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll index 95dd9d515cd93..9beb3b47c6a18 100644 --- a/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll +++ b/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt -S -basic-aa -early-cse-memssa < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll b/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll index 946293df6ca16..ac9e80d9c6e45 100644 --- a/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -early-cse-memssa -verify-memoryssa -disable-output +; RUN: opt < %s -early-cse-memssa -earlycse-debug-hash -verify-memoryssa -disable-output ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/EarlyCSE/and_or.ll b/llvm/test/Transforms/EarlyCSE/and_or.ll index 28161ddfb3c6c..b70d0cea3f7fb 100644 --- a/llvm/test/Transforms/EarlyCSE/and_or.ll +++ b/llvm/test/Transforms/EarlyCSE/and_or.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s define i32 @test_01(i32 %a, i32 %b) { diff --git a/llvm/test/Transforms/EarlyCSE/atomics.ll b/llvm/test/Transforms/EarlyCSE/atomics.ll index 4284265d0aec0..4a4b76666344a 100644 --- a/llvm/test/Transforms/EarlyCSE/atomics.ll +++ b/llvm/test/Transforms/EarlyCSE/atomics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; CHECK-LABEL: @test12( diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll index f69a8168602ac..5178e5a89e205 100644 --- a/llvm/test/Transforms/EarlyCSE/basic.ll +++ b/llvm/test/Transforms/EarlyCSE/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes=early-cse | FileCheck %s diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index abecc3903a6f9..57c5a853a12ff 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -1029,6 +1029,49 @@ define i16 @umul_fix_scale(i16 %a, i16 %b, i32 %s) { ret i16 %o } +; TODO: handle >2 args + +define float @fma(float %a, float %b, float %c) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[X:%.*]] = call float @llvm.fma.f32(float [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.fma.f32(float [[B]], float [[A]], float [[C]]) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[X]], [[Y]] +; CHECK-NEXT: ret float [[R]] +; + %x = call float @llvm.fma.f32(float %a, float %b, float %c) + %y = call float @llvm.fma.f32(float %b, float %a, float %c) + %r = fdiv nnan float %x, %y + ret float %r +} + +define float @fma_different_add_ops(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: @fma_different_add_ops( +; CHECK-NEXT: [[X:%.*]] = call float @llvm.fma.f32(float [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.fma.f32(float [[B]], float [[A]], float [[D:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[X]], [[Y]] +; CHECK-NEXT: ret float [[R]] +; + %x = call float @llvm.fma.f32(float %a, float %b, float %c) + %y = call float @llvm.fma.f32(float %b, float %a, float %d) + %r = fdiv nnan float %x, %y + ret float %r +} + +; TODO: handle >2 args + +define <2 x double> @fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: @fmuladd( +; CHECK-NEXT: [[X:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[B]], <2 x double> [[A]], <2 x double> [[C]]) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan <2 x double> [[X]], [[Y]] +; CHECK-NEXT: ret <2 x double> [[R]] +; + %x = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + %y = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %a, <2 x double> %c) + %r = fdiv nnan <2 x double> %x, %y + ret <2 x double> %r +} + declare float @llvm.maxnum.f32(float, float) declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) @@ -1051,3 +1094,6 @@ declare i16 @llvm.smul.fix.i16(i16, i16, i32) declare i16 @llvm.umul.fix.i16(i16, i16, i32) declare <3 x i16> @llvm.smul.fix.sat.v3i16(<3 x i16>, <3 x i16>, i32) declare <3 x i16> @llvm.umul.fix.sat.v3i16(<3 x i16>, <3 x i16>, i32) + +declare float @llvm.fma.f32(float, float, float) +declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) diff --git a/llvm/test/Transforms/EarlyCSE/conditional.ll b/llvm/test/Transforms/EarlyCSE/conditional.ll index ff0acac5a357d..c4b3277633fe0 100644 --- a/llvm/test/Transforms/EarlyCSE/conditional.ll +++ b/llvm/test/Transforms/EarlyCSE/conditional.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; Can we CSE a known condition to a constant? diff --git a/llvm/test/Transforms/EarlyCSE/const-speculation.ll b/llvm/test/Transforms/EarlyCSE/const-speculation.ll index 5b7f2f5b69829..a531c14da770c 100644 --- a/llvm/test/Transforms/EarlyCSE/const-speculation.ll +++ b/llvm/test/Transforms/EarlyCSE/const-speculation.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S %s | FileCheck %s %mystruct = type { i32 } diff --git a/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll b/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll index b0fb8ff75ad39..2d6c5380394fb 100644 --- a/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll +++ b/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll @@ -1,4 +1,4 @@ -; RUN: opt -S %s -early-cse | FileCheck %s +; RUN: opt -S %s -early-cse -earlycse-debug-hash | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll b/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll index 35d0fd184968e..20a9805302742 100644 --- a/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll +++ b/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S %s -o - | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S %s -o - | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Function Attrs: nounwind uwtable diff --git a/llvm/test/Transforms/EarlyCSE/edge.ll b/llvm/test/Transforms/EarlyCSE/edge.ll index 88bd05d4ec348..bd82502c22290 100644 --- a/llvm/test/Transforms/EarlyCSE/edge.ll +++ b/llvm/test/Transforms/EarlyCSE/edge.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; Same as GVN/edge.ll, but updated to reflect EarlyCSE's less powerful ; implementation. EarlyCSE currently doesn't exploit equality comparisons diff --git a/llvm/test/Transforms/EarlyCSE/fence.ll b/llvm/test/Transforms/EarlyCSE/fence.ll index 0f53edc332bd3..8fb50849ff93a 100644 --- a/llvm/test/Transforms/EarlyCSE/fence.ll +++ b/llvm/test/Transforms/EarlyCSE/fence.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; NOTE: This file is testing the current implementation. Some of ; the transforms used as negative tests below would be legal, but diff --git a/llvm/test/Transforms/EarlyCSE/flags.ll b/llvm/test/Transforms/EarlyCSE/flags.ll index c3e74df624282..9d24ade7d3757 100644 --- a/llvm/test/Transforms/EarlyCSE/flags.ll +++ b/llvm/test/Transforms/EarlyCSE/flags.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll index a4c56bf0059ae..a4293f5eed9c1 100644 --- a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll +++ b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; Ensure we don't simplify away additions vectors of +0.0's (same as scalars). diff --git a/llvm/test/Transforms/EarlyCSE/gc_relocate.ll b/llvm/test/Transforms/EarlyCSE/gc_relocate.ll index 3ec7e129ef523..435b081a9a20b 100644 --- a/llvm/test/Transforms/EarlyCSE/gc_relocate.ll +++ b/llvm/test/Transforms/EarlyCSE/gc_relocate.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s declare void @func() declare i32 @"personality_function"() diff --git a/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll b/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll index 57dbdd8831902..6e423eb93bf77 100644 --- a/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -globals-aa -early-cse-memssa | FileCheck %s +; RUN: opt < %s -S -globals-aa -early-cse-memssa -earlycse-debug-hash | FileCheck %s define i16 @f1() readonly { ret i16 0 diff --git a/llvm/test/Transforms/EarlyCSE/guards.ll b/llvm/test/Transforms/EarlyCSE/guards.ll index 55ec46b186c6d..6a1bef9852061 100644 --- a/llvm/test/Transforms/EarlyCSE/guards.ll +++ b/llvm/test/Transforms/EarlyCSE/guards.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt < %s -S -basic-aa -early-cse-memssa --enable-knowledge-retention | FileCheck %s --check-prefixes=CHECK,USE_ASSUME diff --git a/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll b/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll index e9ba93a1852cb..2b281aa73e3c0 100644 --- a/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll +++ b/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; PR12231 diff --git a/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll b/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll index 1dccaab5e5f7d..f4d8fd25a63c3 100644 --- a/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll +++ b/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -early-cse | FileCheck %s +; RUN: opt -S < %s -early-cse -earlycse-debug-hash | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/EarlyCSE/invariant-loads.ll b/llvm/test/Transforms/EarlyCSE/invariant-loads.ll index c5246009b42b0..df76fd2d61bed 100644 --- a/llvm/test/Transforms/EarlyCSE/invariant-loads.ll +++ b/llvm/test/Transforms/EarlyCSE/invariant-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt -S -basic-aa -early-cse-memssa < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt -S -basic-aa -early-cse-memssa --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME diff --git a/llvm/test/Transforms/EarlyCSE/invariant.start.ll b/llvm/test/Transforms/EarlyCSE/invariant.start.ll index 2202c09c1a0e9..a0e267d85569c 100644 --- a/llvm/test/Transforms/EarlyCSE/invariant.start.ll +++ b/llvm/test/Transforms/EarlyCSE/invariant.start.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt < %s -S -early-cse --enable-knowledge-retention | FileCheck %s --check-prefixes=CHECK,USE_ASSUME ; RUN: opt < %s -S -passes=early-cse | FileCheck %s --check-prefixes=CHECK,NO_ASSUME diff --git a/llvm/test/Transforms/EarlyCSE/memoryssa.ll b/llvm/test/Transforms/EarlyCSE/memoryssa.ll index 5d33ed49bb1b4..3f670852a4d87 100644 --- a/llvm/test/Transforms/EarlyCSE/memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/memoryssa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefix=CHECK-NOMEMSSA +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s --check-prefix=CHECK-NOMEMSSA ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA ; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='early-cse-memssa' | FileCheck %s diff --git a/llvm/test/Transforms/EarlyCSE/phi.ll b/llvm/test/Transforms/EarlyCSE/phi.ll index 2c2972be51041..e9c86ec5d3740 100644 --- a/llvm/test/Transforms/EarlyCSE/phi.ll +++ b/llvm/test/Transforms/EarlyCSE/phi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; Most basic case, fully identical PHI nodes diff --git a/llvm/test/Transforms/EarlyCSE/pr33406.ll b/llvm/test/Transforms/EarlyCSE/pr33406.ll index 4d3312e1f0ac2..903b8bc9f2ace 100644 --- a/llvm/test/Transforms/EarlyCSE/pr33406.ll +++ b/llvm/test/Transforms/EarlyCSE/pr33406.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse-memssa -S %s | FileCheck %s +; RUN: opt -early-cse-memssa -earlycse-debug-hash -S %s | FileCheck %s ; CHECK: define void @patatino() { ; CHECK: for.cond: diff --git a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll index 47a513f2d6a67..d83a42780c647 100644 --- a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll +++ b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s declare void @readnone_may_unwind() readnone diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll index 0a3cd1c7401ca..b28af8535083c 100644 --- a/llvm/test/Transforms/EarlyCSE/writeonly.ll +++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s @var = global i32 undef declare void @foo() nounwind diff --git a/llvm/test/Transforms/GVN/commute.ll b/llvm/test/Transforms/GVN/commute.ll index 72506c0ece283..c76318db56a47 100644 --- a/llvm/test/Transforms/GVN/commute.ll +++ b/llvm/test/Transforms/GVN/commute.ll @@ -67,3 +67,45 @@ if.end: %umul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y) ret { i32, i1 } %umul } + +declare i16 @llvm.smul.fix.i16(i16, i16, i32) +declare i16 @llvm.umul.fix.i16(i16, i16, i32) + +define i16 @intrinsic_3_args(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 1) +; CHECK-NEXT: ret i16 0 +; + %m1 = call i16 @llvm.smul.fix.i16(i16 %x, i16 %y, i32 1) + %m2 = call i16 @llvm.smul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +; Negative test - 3rd arg is different + +define i16 @intrinsic_3_args_not_same(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args_not_same( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 2) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.umul.fix.i16(i16 %x, i16 %y, i32 2) + %m2 = call i16 @llvm.umul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fma(float %x, float %y) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[M1:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) +; CHECK-NEXT: ret float 1.000000e+00 +; + %m1 = call float @llvm.fma.f32(float %x, float %y, float 1.0) + %m2 = call float @llvm.fma.f32(float %y, float %x, float 1.0) + %r = fdiv nnan float %m1, %m2 + ret float %r +} diff --git a/llvm/test/Transforms/GVN/preserve-memoryssa.ll b/llvm/test/Transforms/GVN/preserve-memoryssa.ll new file mode 100644 index 0000000000000..a815baaa3d008 --- /dev/null +++ b/llvm/test/Transforms/GVN/preserve-memoryssa.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -aa-pipeline=basic-aa -passes='require,gvn' -S -verify-memoryssa %s | FileCheck %s + +; REQUIRES: asserts + +declare void @use(i32) readnone + +define i32 @test(i32* %ptr.0, i32** %ptr.1, i1 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LV_0:%.*]] = load i32, i32* [[PTR_0:%.*]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN749:%.*]], label [[FOR_INC774:%.*]] +; CHECK: if.then749: +; CHECK-NEXT: [[LV_1:%.*]] = load i32*, i32** [[PTR_1:%.*]], align 8 +; CHECK-NEXT: store i32 10, i32* [[LV_1]], align 4 +; CHECK-NEXT: [[LV_2_PRE:%.*]] = load i32, i32* [[PTR_0]], align 8 +; CHECK-NEXT: br label [[FOR_INC774]] +; CHECK: for.inc774: +; CHECK-NEXT: [[LV_2:%.*]] = phi i32 [ [[LV_2_PRE]], [[IF_THEN749]] ], [ [[LV_0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @use(i32 [[LV_2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.end435 + +for.end435: + %lv.0 = load i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.0) + br label %if.end724 + +if.end724: + br i1 %c, label %if.then749, label %for.inc774 + +if.then749: + %lv.1 = load i32*, i32** %ptr.1, align 8 + %arrayidx772 = getelementptr inbounds i32, i32* %lv.1, i64 0 + store i32 10, i32* %arrayidx772, align 4 + br label %for.inc774 + +for.inc774: + br label %for.body830 + +for.body830: + %lv.2 = load i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.2) + br label %for.body.i22 + +for.body.i22: + ret i32 1 +} + +define i32 @test_volatile(i32* %ptr.0, i32** %ptr.1, i1 %c) { +; CHECK-LABEL: @test_volatile( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LV_0:%.*]] = load volatile i32, i32* [[PTR_0:%.*]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN749:%.*]], label [[FOR_INC774:%.*]] +; CHECK: if.then749: +; CHECK-NEXT: [[LV_1:%.*]] = load volatile i32*, i32** [[PTR_1:%.*]], align 8 +; CHECK-NEXT: store i32 10, i32* [[LV_1]], align 4 +; CHECK-NEXT: br label [[FOR_INC774]] +; CHECK: for.inc774: +; CHECK-NEXT: [[LV_2:%.*]] = load volatile i32, i32* [[PTR_0]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.end435 + +for.end435: + %lv.0 = load volatile i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.0) + br label %if.end724 + +if.end724: + br i1 %c, label %if.then749, label %for.inc774 + +if.then749: + %lv.1 = load volatile i32*, i32** %ptr.1, align 8 + %arrayidx772 = getelementptr inbounds i32, i32* %lv.1, i64 0 + store i32 10, i32* %arrayidx772, align 4 + br label %for.inc774 + +for.inc774: + br label %for.body830 + +for.body830: + %lv.2 = load volatile i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.2) + br label %for.body.i22 + +for.body.i22: + ret i32 1 +} diff --git a/llvm/test/Transforms/GVNHoist/pr37445.ll b/llvm/test/Transforms/GVNHoist/pr37445.ll index 817fea14077d0..82cdced2c6129 100644 --- a/llvm/test/Transforms/GVNHoist/pr37445.ll +++ b/llvm/test/Transforms/GVNHoist/pr37445.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -early-cse-memssa -gvn-hoist -S | FileCheck %s +; RUN: opt < %s -early-cse-memssa -earlycse-debug-hash -gvn-hoist -S | FileCheck %s ; Make sure opt won't crash and that this pair of ; instructions (load, icmp) is hoisted successfully diff --git a/llvm/test/Transforms/GVNHoist/pr38807.ll b/llvm/test/Transforms/GVNHoist/pr38807.ll index f8c7f7e636379..0fcfd2180c681 100644 --- a/llvm/test/Transforms/GVNHoist/pr38807.ll +++ b/llvm/test/Transforms/GVNHoist/pr38807.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -early-cse-memssa -gvn-hoist -S | FileCheck %s +; RUN: opt < %s -early-cse-memssa -earlycse-debug-hash -gvn-hoist -S | FileCheck %s ; Make sure opt doesn't crash. On top of that, the instructions ; of the side blocks should be hoisted to the entry block. diff --git a/llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll b/llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll new file mode 100644 index 0000000000000..f52ba05e6c19b --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll @@ -0,0 +1,27 @@ +; RUN: opt -globalopt < %s -S -o - | FileCheck %s + +; When simplifying users of a global variable, the pass could incorrectly +; return false if there were still some uses left, and no further optimizations +; was done. This was caught by the pass return status check that is hidden +; under EXPENSIVE_CHECKS. + +@GV1 = internal unnamed_addr global i64 1, align 8 + +; CHECK: @GV1 = internal unnamed_addr global i64 1, align 8 + +define void @test1() local_unnamed_addr { +; CHECK-LABEL: @test1 +; CHECK-NEXT: %val = load atomic i8 +; CHECK-NEXT: ret void + + %val = load atomic i8, i8* bitcast (i64* @GV1 to i8*) acquire, align 8 + ret void +} + +define i64 @test2() local_unnamed_addr { +; CHECK-LABEL: @test2 +; CHECK-NEXT: ret i64 1 + + %val = load atomic i64, i64* @GV1 acquire, align 8 + ret i64 %val +} diff --git a/llvm/test/Transforms/GlobalOpt/const-return-status.ll b/llvm/test/Transforms/GlobalOpt/const-return-status.ll new file mode 100644 index 0000000000000..32c4eb895dc1a --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/const-return-status.ll @@ -0,0 +1,28 @@ +; RUN: opt -globalopt < %s -S -o - | FileCheck %s + +; When simplifying users of a global variable, the pass could incorrectly +; return false if there were still some uses left, and no further optimizations +; was done. This was caught by the pass return status check that is hidden +; under EXPENSIVE_CHECKS. + +; CHECK: @src = internal unnamed_addr constant + +; CHECK: entry: +; CHECK-NEXT: %call = call i32 @f(i32 0) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (i32* @dst to i8*), i8* align 4 bitcast ([1 x i32]* @src to i8*), i64 1, i1 false) +; CHECK-NEXT: ret void + +@src = internal unnamed_addr global [1 x i32] zeroinitializer, align 4 +@dst = external dso_local local_unnamed_addr global i32, align 4 + +define dso_local void @d() local_unnamed_addr { +entry: + %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @src, i64 0, i64 0), align 4 + %call = call i32 @f(i32 %0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (i32* @dst to i8*), i8* align 4 bitcast ([1 x i32]* @src to i8*), i64 1, i1 false) + ret void +} + +declare dso_local i32 @f(i32) local_unnamed_addr + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll new file mode 100644 index 0000000000000..988b3923263f6 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s +; RUN: opt -passes=indvars -S < %s | FileCheck %s + +; Monotonic decrementing iv. we should be able to prove that %iv.next @llvm.abs.v4i32(<4 x i32>, i1) +declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) +declare void @llvm.assume(i1) ; abs preserves trailing zeros so the second and is unneeded define i32 @abs_trailing_zeros(i32 %x) { @@ -126,3 +128,129 @@ define <4 x i32> @abs_of_select_neg_false_val(<4 x i1> %b, <4 x i32> %x) { %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sel, i1 false) ret <4 x i32> %abs } + +define i32 @abs_dom_cond_nopoison(i32 %x) { +; CHECK-LABEL: @abs_dom_cond_nopoison( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i32 [[X]] +; CHECK: false: +; CHECK-NEXT: [[A2:%.*]] = sub i32 0, [[X]] +; CHECK-NEXT: ret i32 [[A2]] +; + %cmp = icmp sge i32 %x, 0 + br i1 %cmp, label %true, label %false + +true: + %a1 = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %a1 + +false: + %a2 = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %a2 +} + +define i32 @abs_dom_cond_poison(i32 %x) { +; CHECK-LABEL: @abs_dom_cond_poison( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i32 [[X]] +; CHECK: false: +; CHECK-NEXT: [[A2:%.*]] = sub nsw i32 0, [[X]] +; CHECK-NEXT: ret i32 [[A2]] +; + %cmp = icmp sge i32 %x, 0 + br i1 %cmp, label %true, label %false + +true: + %a1 = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %a1 + +false: + %a2 = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %a2 +} + +; Abs argument non-neg based on known bits. + +define i32 @zext_abs(i31 %x) { +; CHECK-LABEL: @zext_abs( +; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: ret i32 [[ZEXT]] +; + %zext = zext i31 %x to i32 + %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) + ret i32 %abs +} + +define <3 x i82> @lshr_abs(<3 x i82> %x) { +; CHECK-LABEL: @lshr_abs( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: ret <3 x i82> [[LSHR]] +; + %lshr = lshr <3 x i82> %x, + %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) + ret <3 x i82> %abs +} + +define i32 @and_abs(i32 %x) { +; CHECK-LABEL: @and_abs( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 +; CHECK-NEXT: ret i32 [[AND]] +; + %and = and i32 %x, 2147483644 + %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) + ret i32 %abs +} + +define <3 x i82> @select_abs(<3 x i1> %cond) { +; CHECK-LABEL: @select_abs( +; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> +; CHECK-NEXT: ret <3 x i82> [[SEL]] +; + %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> + %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) + ret <3 x i82> %abs +} + +define i32 @assume_abs(i32 %x) { +; CHECK-LABEL: @assume_abs( +; CHECK-NEXT: [[ASSUME:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) +; CHECK-NEXT: ret i32 [[X]] +; + %assume = icmp sge i32 %x, 0 + call void @llvm.assume(i1 %assume) + %abs = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %abs +} + +; Abs argument negative based on known bits. + +define i32 @abs_assume_neg(i32 %x) { +; CHECK-LABEL: @abs_assume_neg( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %cmp = icmp slt i32 %x, 0 + call void @llvm.assume(i1 %cmp) + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %abs +} + +define i32 @abs_known_neg(i16 %x) { +; CHECK-LABEL: @abs_known_neg( +; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[EXT]], -1 +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[NEG]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %ext = zext i16 %x to i32 + %neg = sub nsw i32 -1, %ext + %abs = call i32 @llvm.abs.i32(i32 %neg, i1 false) + ret i32 %abs +} diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 6f33e83ee3362..8ca24caa2aa1b 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -1,66 +1,56 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S -instcombine-infinite-loop-threshold=2 | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +declare void @llvm.assume(i1) #1 + +; Check that the alignment has been upgraded and that the assume has not +; been removed: + define i32 @foo1(i32* %a) #0 { ; CHECK-LABEL: @foo1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 32 +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[A:%.*]], align 32 ; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 ; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 ; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: ret i32 [[T0]] ; -entry: - %0 = load i32, i32* %a, align 4 - -; Check that the alignment has been upgraded and that the assume has not -; been removed: - + %t0 = load i32, i32* %a, align 4 %ptrint = ptrtoint i32* %a to i64 %maskedptr = and i64 %ptrint, 31 %maskcond = icmp eq i64 %maskedptr, 0 tail call void @llvm.assume(i1 %maskcond) - - ret i32 %0 + ret i32 %t0 } +; Same check as in @foo1, but make sure it works if the assume is first too. + define i32 @foo2(i32* %a) #0 { ; CHECK-LABEL: @foo2( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A:%.*]] to i64 ; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 ; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 32 -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 32 +; CHECK-NEXT: ret i32 [[T0]] ; -entry: -; Same check as in @foo1, but make sure it works if the assume is first too. - %ptrint = ptrtoint i32* %a to i64 %maskedptr = and i64 %ptrint, 31 %maskcond = icmp eq i64 %maskedptr, 0 tail call void @llvm.assume(i1 %maskcond) - - %0 = load i32, i32* %a, align 4 - ret i32 %0 + %t0 = load i32, i32* %a, align 4 + ret i32 %t0 } -declare void @llvm.assume(i1) #1 - define i32 @simple(i32 %a) #1 { ; CHECK-LABEL: @simple( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 4 ; -entry: - - %cmp = icmp eq i32 %a, 4 tail call void @llvm.assume(i1 %cmp) ret i32 %a @@ -68,72 +58,55 @@ entry: define i32 @can1(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @can1( -; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.assume(i1 [[A:%.*]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[B:%.*]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[C:%.*]]) ; CHECK-NEXT: ret i32 5 ; -entry: %and1 = and i1 %a, %b %and = and i1 %and1, %c tail call void @llvm.assume(i1 %and) - - ret i32 5 } define i32 @can2(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @can2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[A:%.*]], true ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) ; CHECK-NEXT: ret i32 5 ; -entry: %v = or i1 %a, %b %w = xor i1 %v, 1 tail call void @llvm.assume(i1 %w) - - ret i32 5 } define i32 @bar1(i32 %a) #0 { ; CHECK-LABEL: @bar1( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 1 ; -entry: %and1 = and i32 %a, 3 - - %and = and i32 %a, 7 %cmp = icmp eq i32 %and, 1 tail call void @llvm.assume(i1 %cmp) - ret i32 %and1 } define i32 @bar2(i32 %a) #0 { ; CHECK-LABEL: @bar2( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 1 ; -entry: - %and = and i32 %a, 7 %cmp = icmp eq i32 %and, 1 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 3 ret i32 %and1 } @@ -176,15 +149,11 @@ define i32 @bar4(i32 %a, i32 %b) { ; entry: %and1 = and i32 %b, 3 - - %and = and i32 %a, 7 %cmp = icmp eq i32 %and, 1 tail call void @llvm.assume(i1 %cmp) - %cmp2 = icmp eq i32 %a, %b tail call void @llvm.assume(i1 %cmp2) - ret i32 %and1 } @@ -377,10 +346,10 @@ define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) { define void @debug_interference(i8 %x) { ; CHECK-LABEL: @debug_interference( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7:metadata !.*]], metadata !DIExpression()), [[DBG9:!dbg !.*]] ; CHECK-NEXT: tail call void @llvm.assume(i1 false) -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) ; CHECK-NEXT: ret void ; @@ -532,7 +501,6 @@ define void @always_true_assumption() { ; call void @llvm.assume(i1 true) ret void - } ; The alloca guarantees that the low bits of %a are zero because of alignment. @@ -575,7 +543,6 @@ define i8 @conflicting_assumptions(i8 %x){ define void @PR36270(i32 %b) { ; CHECK-LABEL: @PR36270( -; CHECK-NEXT: tail call void @llvm.assume(i1 false) ; CHECK-NEXT: unreachable ; %B7 = xor i32 -1, 2147483647 @@ -588,6 +555,85 @@ define void @PR36270(i32 %b) { unreachable } +; PR47416 + +define i32 @unreachable_assume(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = or i1 %cmp0, %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + unreachable +} + +define i32 @unreachable_assumes_and_store(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @unreachable_assumes_and_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = or i1 %cmp0, %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + %cmp5 = icmp ugt i32 %y, 42 + tail call void @llvm.assume(i1 %cmp5) + store i32 %x, i32* %p + unreachable +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll b/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll new file mode 100644 index 0000000000000..3622904fa07d7 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +define i35 @smax(i35 %x) { +; CHECK-LABEL: @smax( +; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smax.i35(i35 [[X:%.*]], i35 42) +; CHECK-NEXT: ret i35 [[R]] +; + %r = call i35 @llvm.smax.i35(i35 42, i35 %x) + ret i35 %r +} + +define i5 @smin(i5 %x) { +; CHECK-LABEL: @smin( +; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.smin.i5(i5 [[X:%.*]], i5 10) +; CHECK-NEXT: ret i5 [[R]] +; + %r = call i5 @llvm.smin.i5(i5 42, i5 %x) + ret i5 %r +} + +define <2 x i35> @umax(<2 x i35> %x) { +; CHECK-LABEL: @umax( +; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.umax.v2i35(<2 x i35> [[X:%.*]], <2 x i35> ) +; CHECK-NEXT: ret <2 x i35> [[R]] +; + %r = call <2 x i35> @llvm.umax.v2i35(<2 x i35> , <2 x i35> %x) + ret <2 x i35> %r +} + +define <3 x i35> @umin(<3 x i35> %x) { +; CHECK-LABEL: @umin( +; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umin.v3i35(<3 x i35> [[X:%.*]], <3 x i35> ) +; CHECK-NEXT: ret <3 x i35> [[R]] +; + %r = call <3 x i35> @llvm.umin.v3i35(<3 x i35> , <3 x i35> %x) + ret <3 x i35> %r +} + +define i35 @smul_fix(i35 %x) { +; CHECK-LABEL: @smul_fix( +; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smul.fix.i35(i35 [[X:%.*]], i35 42, i32 2) +; CHECK-NEXT: ret i35 [[R]] +; + %r = call i35 @llvm.smul.fix.i35(i35 42, i35 %x, i32 2) + ret i35 %r +} + +define i5 @umul_fix(i5 %x) { +; CHECK-LABEL: @umul_fix( +; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.umul.fix.i5(i5 [[X:%.*]], i5 10, i32 3) +; CHECK-NEXT: ret i5 [[R]] +; + %r = call i5 @llvm.umul.fix.i5(i5 42, i5 %x, i32 3) + ret i5 %r +} + +define <2 x i35> @smul_fix_sat(<2 x i35> %x) { +; CHECK-LABEL: @smul_fix_sat( +; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> [[X:%.*]], <2 x i35> , i32 4) +; CHECK-NEXT: ret <2 x i35> [[R]] +; + %r = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> , <2 x i35> %x, i32 4) + ret <2 x i35> %r +} + +define <3 x i35> @umul_fix_sat(<3 x i35> %x) { +; CHECK-LABEL: @umul_fix_sat( +; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> [[X:%.*]], <3 x i35> , i32 5) +; CHECK-NEXT: ret <3 x i35> [[R]] +; + %r = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> , <3 x i35> %x, i32 5) + ret <3 x i35> %r +} + +declare i35 @llvm.smax.i35(i35, i35) +declare i5 @llvm.smin.i5(i5, i5) +declare <2 x i35> @llvm.umax.v2i35(<2 x i35>, <2 x i35>) +declare <3 x i35> @llvm.umin.v3i35(<3 x i35>, <3 x i35>) +declare i35 @llvm.smul.fix.i35(i35, i35, i32) +declare i5 @llvm.umul.fix.i5(i5, i5, i32) +declare <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35>, <2 x i35>, i32) +declare <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35>, <3 x i35>, i32) diff --git a/llvm/test/Transforms/InstCombine/cttz-abs.ll b/llvm/test/Transforms/InstCombine/cttz-abs.ll index 3faa8665376cc..b89a55c8f5b87 100644 --- a/llvm/test/Transforms/InstCombine/cttz-abs.ll +++ b/llvm/test/Transforms/InstCombine/cttz-abs.ll @@ -3,7 +3,7 @@ define i32 @cttz_abs(i32 %x) { ; CHECK-LABEL: @cttz_abs( -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), [[RNG0:!range !.*]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 0 @@ -29,7 +29,7 @@ define i32 @cttz_abs2(i32 %x) { ; CHECK-LABEL: @cttz_abs2( ; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @use_cond(i1 [[C]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp sgt i32 %x, 0 @@ -44,7 +44,7 @@ define i32 @cttz_abs3(i32 %x) { ; CHECK-LABEL: @cttz_abs3( ; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: call void @use_cond(i1 [[C]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp sgt i32 %x, -1 @@ -57,7 +57,7 @@ define i32 @cttz_abs3(i32 %x) { define i32 @cttz_abs4(i32 %x) { ; CHECK-LABEL: @cttz_abs4( -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 1 @@ -69,7 +69,7 @@ define i32 @cttz_abs4(i32 %x) { define i32 @cttz_nabs(i32 %x) { ; CHECK-LABEL: @cttz_nabs( -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 0 @@ -93,7 +93,7 @@ define <2 x i64> @cttz_nabs_vec(<2 x i64> %x) { define i64 @cttz_abs_64(i64 %x) { ; CHECK-LABEL: @cttz_abs_64( -; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range !1 +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), [[RNG1:!range !.*]] ; CHECK-NEXT: ret i64 [[R]] ; %c = icmp slt i64 %x, 0 @@ -109,7 +109,7 @@ define i32 @cttz_abs_multiuse(i32 %x) { ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[S]], i32 [[X]] ; CHECK-NEXT: call void @use_abs(i32 [[D]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 1 @@ -126,7 +126,7 @@ define i32 @cttz_nabs_multiuse(i32 %x) { ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[S]] ; CHECK-NEXT: call void @use_abs(i32 [[D]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 1 @@ -144,7 +144,7 @@ define i32 @no_cttz_abs(i32 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X:%.*]], 2 ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[S]], i32 [[X]] -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 2 @@ -159,7 +159,7 @@ define i32 @no_cttz_abs2(i32 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: [[S:%.*]] = sub i32 1, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[S]], i32 [[X]] -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 0 @@ -175,7 +175,7 @@ define i32 @no_cttz_abs3(i32 %x) { ; CHECK-NEXT: call void @use_cond(i1 [[C]]) ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[S]] -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp sgt i32 %x, -2 @@ -216,9 +216,30 @@ define <2 x i64> @no_cttz_nabs_vec(<2 x i64> %x) { ret <2 x i64> %r } +define i32 @cttz_abs_intrin(i32 %x) { +; CHECK-LABEL: @cttz_abs_intrin( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), [[RNG0]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 false) + %r = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %r +} + +define i32 @cttz_nabs_intrin(i32 %x) { +; CHECK-LABEL: @cttz_nabs_intrin( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), [[RNG0]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 false) + %n = sub i32 0, %a + %r = call i32 @llvm.cttz.i32(i32 %n, i1 false) + ret i32 %r +} declare void @use_cond(i1) declare void @use_abs(i32) declare i32 @llvm.cttz.i32(i32, i1) declare i64 @llvm.cttz.i64(i64) declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>) +declare i32 @llvm.abs.i32(i32, i1) diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll index de030bb59c568..e77a828729e1d 100644 --- a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll @@ -103,7 +103,7 @@ define double @rsqrt_x_reassociate_extra_use(double %x, double * %p) { ; CHECK-LABEL: @rsqrt_x_reassociate_extra_use( ; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECK-NEXT: [[RSQRT:%.*]] = fdiv double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: [[RES:%.*]] = fmul reassoc nsz double [[RSQRT]], [[X]] +; CHECK-NEXT: [[RES:%.*]] = fdiv reassoc nsz double [[X:%.*]], [[SQRT]] ; CHECK-NEXT: store double [[RSQRT]], double* [[P:%.*]], align 8 ; CHECK-NEXT: ret double [[RES]] ; @@ -119,7 +119,7 @@ define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x flo ; CHECK-NEXT: [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]]) ; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> , [[SQRT]] -; CHECK-NEXT: [[RES:%.*]] = fmul fast <2 x float> [[ADD]], [[RSQRT]] +; CHECK-NEXT: [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]] ; CHECK-NEXT: store <2 x float> [[RSQRT]], <2 x float>* [[P:%.*]], align 8 ; CHECK-NEXT: ret <2 x float> [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index 8e7d9056726e4..e2aff1c304adf 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -365,3 +365,306 @@ define i1 @ne_rem_zero_nonuw(i8 %x) { %b = icmp ne i8 %a, 30 ret i1 %b } + +define i1 @mul_constant_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 5 + %B = mul i32 %y, 5 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_extra_use2( +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 6 + %B = mul nsw i32 %y, 6 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nsw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nsw <2 x i32> %x, + %B = mul nsw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 74 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 74 + call void @use(i8 %A) + %B = mul nsw i8 %y, 74 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw_extra_use2( +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 20 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 20 + %B = mul nsw i8 %y, 20 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 24 + call void @use(i8 %A) + %B = mul nsw i8 %y, 24 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_eq( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 22 + %B = mul nuw i32 %y, 22 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nuw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nuw <2 x i32> %x, + %B = mul nuw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 6 + call void @use(i8 %A) + %B = mul nuw i8 %y, 6 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nuw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nuw_extra_use2( +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 36 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 36 + %B = mul nuw i8 %y, 36 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 38 + call void @use(i8 %A) + %B = mul nuw i8 %y, 38 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +; Negative test - wrong pred + +define i1 @mul_constant_ult(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_ult( +; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 47 +; CHECK-NEXT: [[B:%.*]] = mul i32 [[Y:%.*]], 47 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 47 + %B = mul i32 %y, 47 + %C = icmp ult i32 %A, %B + ret i1 %C +} + +; Negative test - wrong pred + +define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_sgt( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 46 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 46 + %C = icmp sgt i32 %A, %B + ret i1 %C +} + +; Negative test - wrong constants + +define i1 @mul_mismatch_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_mismatch_constant_nuw_eq( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 44 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +; If the multiply constant has any trailing zero bits but could overflow, +; we get something completely different. +; We mask off the high bits of each input and then convert: +; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + +define i1 @mul_constant_partial_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_partial_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 44 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @mul_constant_mismatch_wrap_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_mismatch_wrap_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 54 + %B = mul nuw i32 %y, 54 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 12 + %B = mul i32 %y, 12 + %C = icmp ne i32 %A, %B + ret i1 %C +} + +define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp eq <2 x i32> %A, %B + ret <2 x i1> %C +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 3b223d71f9d48..683518121789c 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3,6 +3,8 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +declare i8 @llvm.abs.i8(i8, i1) + define i32 @test1(i32 %X) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 [[X:%.*]], 31 @@ -1090,6 +1092,26 @@ define zeroext i1 @cmpabs2(i64 %val) { ret i1 %tobool } +define i1 @abs_intrin_eq_zero(i8 %x) { +; CHECK-LABEL: @abs_intrin_eq_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp eq i8 %abs, 0 + ret i1 %cmp +} + +define i1 @abs_intrin_ne_zero(i8 %x) { +; CHECK-LABEL: @abs_intrin_ne_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp ne i8 %abs, 0 + ret i1 %cmp +} + define void @test58() { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 36029346783166592) @@ -3375,58 +3397,6 @@ define i1 @eq_add_constants(i32 %x, i32 %y) { ret i1 %C } -define i1 @eq_mul_constants(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 5 - %B = mul i32 %y, 5 - %C = icmp eq i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_splat( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp ne <2 x i32> %A, %B - ret <2 x i1> %C -} - -; If the multiply constant has any trailing zero bits, we get something completely different. -; We mask off the high bits of each input and then convert: -; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - -define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 12 - %B = mul i32 %y, 12 - %C = icmp ne i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp eq <2 x i32> %A, %B - ret <2 x i1> %C -} - declare i32 @llvm.bswap.i32(i32) define i1 @bswap_ne(i32 %x, i32 %y) { diff --git a/llvm/test/Transforms/InstCombine/invariant.group.ll b/llvm/test/Transforms/InstCombine/invariant.group.ll index f3774ad6c14d2..7a33bfd090773 100644 --- a/llvm/test/Transforms/InstCombine/invariant.group.ll +++ b/llvm/test/Transforms/InstCombine/invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt -instcombine -early-cse -S < %s | FileCheck %s +; RUN: opt -instcombine -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; CHECK-LABEL: define i8* @simplifyNullLaunder() diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 2e9250fe94c41..9959841f813ac 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +declare i32 @llvm.abs.i32(i32, i1) + define i32 @pow2_multiplier(i32 %A) { ; CHECK-LABEL: @pow2_multiplier( ; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 @@ -858,6 +860,27 @@ define <4 x i32> @combine_mul_nabs_v4i32(<4 x i32> %0) { ret <4 x i32> %m } +define i32 @combine_mul_abs_intrin(i32 %x) { +; CHECK-LABEL: @combine_mul_abs_intrin( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %mul = mul i32 %abs, %abs + ret i32 %mul +} + +define i32 @combine_mul_nabs_intrin(i32 %x) { +; CHECK-LABEL: @combine_mul_nabs_intrin( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %mul = mul i32 %neg, %neg + ret i32 %mul +} + ; z * splat(0) = splat(0), even for scalable vectors define @mul_scalable_splat_zero( %z) { ; CHECK-LABEL: @mul_scalable_splat_zero( diff --git a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll index 35f0e5caadc6e..d533703ac6fbe 100644 --- a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll +++ b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll @@ -13,41 +13,39 @@ target datalayout = "p:16:16" define void @f(i1 %cond) { ; CHECK-LABEL: @f( ; CHECK-NEXT: bb0: -; CHECK-NEXT: [[TMP12:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[TMP12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP12]], i16 0, i16 0 +; CHECK-NEXT: [[T12:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint [2 x i32]* [[TMP12]] to i16 -; CHECK-NEXT: store i16 [[TMP8]], i16* @a, align 2 ; CHECK-NEXT: unreachable ; CHECK: bb2: -; CHECK-NEXT: [[TMP9:%.*]] = load i16*, i16** @b, align 2 -; CHECK-NEXT: store i16 0, i16* [[TMP9]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP12_SUB]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -1 -; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP12_SUB]], align 8 +; CHECK-NEXT: [[T9:%.*]] = load i16*, i16** @b, align 2 +; CHECK-NEXT: store i16 0, i16* [[T9]], align 2 +; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[T12_SUB]], align 8 +; CHECK-NEXT: [[T11:%.*]] = add i32 [[T10]], -1 +; CHECK-NEXT: store i32 [[T11]], i32* [[T12_SUB]], align 8 ; CHECK-NEXT: ret void ; bb0: - %tmp1 = alloca %i64_t - %tmp2 = bitcast %i64_t* %tmp1 to i32* - %useless3 = bitcast %i64_t* %tmp1 to i16* + %t1 = alloca %i64_t + %t2 = bitcast %i64_t* %t1 to i32* + %useless3 = bitcast %i64_t* %t1 to i16* %useless4 = getelementptr inbounds i16, i16* %useless3, i16 undef %useless5 = bitcast i16* %useless4 to i32* br i1 %cond, label %bb1, label %bb2 bb1: ; preds = %bb0 - %useless6 = insertvalue [1 x i32*] undef, i32* %tmp2, 0 + %useless6 = insertvalue [1 x i32*] undef, i32* %t2, 0 %useless7 = insertvalue [1 x i32*] %useless6, i32* null, 0 - %tmp8 = ptrtoint i32* %tmp2 to i16 - store i16 %tmp8, i16* @a + %t8 = ptrtoint i32* %t2 to i16 + store i16 %t8, i16* @a unreachable bb2: ; preds = %bb0 - %tmp9 = load i16*, i16** @b - store i16 0, i16* %tmp9 - %tmp10 = load i32, i32* %tmp2 - %tmp11 = sub i32 %tmp10, 1 - store i32 %tmp11, i32* %tmp2 + %t9 = load i16*, i16** @b + store i16 0, i16* %t9 + %t10 = load i32, i32* %t2 + %t11 = sub i32 %t10, 1 + store i32 %t11, i32* %t2 ret void } diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index cf9604223f6c1..f31eeb46d8823 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -14,6 +14,32 @@ define i64 @test_inbounds([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_partial_inbounds1([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds1( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_partial_inbounds2([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds2( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + define i64 @test_inbounds_nuw([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds_nuw( ; CHECK-NEXT: [[P2_IDX:%.*]] = shl nuw nsw i64 [[IDX:%.*]], 2 @@ -69,13 +95,39 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_inbounds1_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds1_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds2_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 ; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 -; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[P1_IDX_NEG]], [[P2_IDX]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index d2e566be34110..437d8f8c5c023 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -503,12 +503,11 @@ define i64 @test24b(i8* %P, i64 %A){ ret i64 %G } - define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A %C = ptrtoint i16* %B to i64 @@ -521,9 +520,9 @@ define i64 @test25(i8* %P, i64 %A){ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i16 [[DOTNEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A %C = ptrtoint i16 addrspace(1)* %B to i16 @@ -826,8 +825,8 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) { define i64 @test29(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j @@ -839,9 +838,9 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i64 [[I:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -854,9 +853,9 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i16 [[I:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[DOTNEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1235,10 +1234,10 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; "%sub = i64 %i, %j, ret i64 %sub" ; gep1 and gep2 have only one use ; CHECK-LABEL: @test58( -; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j diff --git a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll index 8dcb167408698..0b148172a42ae 100644 --- a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll +++ b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll @@ -146,3 +146,25 @@ define @extractelement_insertelement_diff_positions( %3, i32 %vec.e3, i32 3 ret %4 } + +define i32 @bitcast_of_extractelement( %d) { +; CHECK-LABEL: @bitcast_of_extractelement( +; CHECK-NEXT: [[BC:%.*]] = bitcast [[D:%.*]] to +; CHECK-NEXT: [[CAST:%.*]] = extractelement [[BC]], i32 0 +; CHECK-NEXT: ret i32 [[CAST]] +; + %ext = extractelement %d, i32 0 + %cast = bitcast float %ext to i32 + ret i32 %cast +} + +define i1 @extractelement_is_zero( %d, i1 %b, i32 %z) { +; CHECK-LABEL: @extractelement_is_zero( +; CHECK-NEXT: [[EXT:%.*]] = extractelement [[D:%.*]], i32 0 +; CHECK-NEXT: [[BB:%.*]] = icmp eq i32 [[EXT]], 0 +; CHECK-NEXT: ret i1 [[BB]] +; + %ext = extractelement %d, i32 0 + %bb = icmp eq i32 %ext, 0 + ret i1 %bb +} diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll b/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll index 1315b3628475e..2dc381d0949bd 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S -o - %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S -o - %s | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64-ni:2" target triple = "armv7-unknown-linux-gnueabi" diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll b/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll index b57397381b647..b16910b947405 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll @@ -1,195 +1,195 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S -o - %s | FileCheck %s - -declare double @acos(double) -define double @f_acos() { -; CHECK-LABEL: @f_acos( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @acos(double 1.0) - ret double %res -} - -declare float @asinf(float) -define float @f_asinf() { -; CHECK-LABEL: @f_asinf( -; CHECK-NEXT: ret float 0x3FF921FB{{.+}} -; - %res = tail call fast float @asinf(float 1.0) - ret float %res -} - -declare double @atan(double) -define double @f_atan() { -; CHECK-LABEL: @f_atan( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan(double 1.000000e+00) -; CHECK-NEXT: ret double 0x3FE921FB -; - %res = tail call fast double @atan(double 1.0) - ret double %res -} - -declare float @cosf(float) -define float @f_cosf() { -; CHECK-LABEL: @f_cosf( -; CHECK-NEXT: ret float 0x3FE14A2{{.+}} -; - %res = tail call fast float @cosf(float 1.0) - ret float %res -} - -declare float @llvm.cos.f32(float) -define float @i_cosf() { -; CHECK-LABEL: @i_cosf( -; CHECK-NEXT: ret float 0x3FE14A2 -; - %res = tail call fast float @llvm.cos.f32(float 1.0) - ret float %res -} - -declare double @cosh(double) -define double @f_cosh() { -; CHECK-LABEL: @f_cosh( -; CHECK-NEXT: ret double 0x3FF8B075{{.+}} -; - %res = tail call fast double @cosh(double 1.0) - ret double %res -} - -declare float @expf(float) -define float @f_expf() { -; CHECK-LABEL: @f_expf( -; CHECK-NEXT: ret float 0x4005BF0A{{.+}} -; - %res = tail call fast float @expf(float 1.0) - ret float %res -} - -declare float @llvm.exp.f32(float) -define float @i_expf() { -; CHECK-LABEL: @i_expf( -; CHECK-NEXT: ret float 0x4005BF0A{{.+}} -; - %res = tail call fast float @llvm.exp.f32(float 1.0) - ret float %res -} - -declare double @exp2(double) -define double @f_exp2() { -; CHECK-LABEL: @f_exp2( -; CHECK-NEXT: ret double 2.000000e+00 -; - %res = tail call fast double @exp2(double 1.0) - ret double %res -} - -declare double @llvm.exp2.f64(double) -define double @i_exp2() { -; CHECK-LABEL: @i_exp2( -; CHECK-NEXT: ret double 2.000000e+00 -; - %res = tail call fast double @llvm.exp2.f64(double 1.0) - ret double %res -} - -; FIXME: exp10() is not widely supported. -declare float @exp10f(float) -define float @f_exp10f() { -; CHECK-LABEL: @f_exp10f( -; CHECK-NEXT: [[RES:%.*]] = tail call float @exp10f(float 1.000000e+00) -; CHECK-NEXT: ret float [[RES]] -; - %res = tail call float @exp10f(float 1.0) - ret float %res -} - -declare double @log(double) -define double @f_log() { -; CHECK-LABEL: @f_log( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @log(double 1.0) - ret double %res -} - -declare double @llvm.log.f64(double) -define double @i_log() { -; CHECK-LABEL: @i_log( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @llvm.log.f64(double 1.0) - ret double %res -} - -declare float @log2f(float) -define float @f_log2f() { -; CHECK-LABEL: @f_log2f( -; CHECK-NEXT: ret float 0.000000e+00 -; - %res = tail call fast float @log2f(float 1.0) - ret float %res -} - -declare float @llvm.log2.f32(float) -define float @i_log2f() { -; CHECK-LABEL: @i_log2f( -; CHECK-NEXT: ret float 0.000000e+00 -; - %res = tail call fast float @llvm.log2.f32(float 1.0) - ret float %res -} - -declare double @log10(double) -define double @f_log10() { -; CHECK-LABEL: @f_log10( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @log10(double 1.0) - ret double %res -} - -declare float @sinf(float) -define float @f_sinf() { -; CHECK-LABEL: @f_sinf( -; CHECK-NEXT: ret float 0x3FEAED54{{.+}} -; - %res = tail call fast float @sinf(float 1.0) - ret float %res -} - -declare double @sinh(double) -define double @f_sinh() { -; CHECK-LABEL: @f_sinh( -; CHECK-NEXT: ret double 0x3FF2CD9F{{.+}} -; - %res = tail call fast double @sinh(double 1.0) - ret double %res -} - -declare float @sqrtf(float) -define float @f_sqrtf() { -; CHECK-LABEL: @f_sqrtf( -; CHECK-NEXT: ret float 1.000000e+00 -; - %res = tail call fast float @sqrtf(float 1.0) - ret float %res -} - -declare double @tan(double) -define double @f_tan() { -; CHECK-LABEL: @f_tan( -; CHECK-NEXT: ret double 0x3FF8EB24{{.+}} -; - %res = tail call fast double @tan(double 1.0) - ret double %res -} - -declare float @tanhf(float) -define float @f_tanhf() { -; CHECK-LABEL: @f_tanhf( -; CHECK-NEXT: [[RES:%.*]] = tail call fast float @tanhf(float 1.000000e+00) -; CHECK-NEXT: ret float 0x3FE85EFA{{.+}} -; - %res = tail call fast float @tanhf(float 1.0) - ret float %res -} +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -early-cse -earlycse-debug-hash -S -o - %s | FileCheck %s + +declare double @acos(double) +define double @f_acos() { +; CHECK-LABEL: @f_acos( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @acos(double 1.0) + ret double %res +} + +declare float @asinf(float) +define float @f_asinf() { +; CHECK-LABEL: @f_asinf( +; CHECK-NEXT: ret float 0x3FF921FB{{.+}} +; + %res = tail call fast float @asinf(float 1.0) + ret float %res +} + +declare double @atan(double) +define double @f_atan() { +; CHECK-LABEL: @f_atan( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan(double 1.000000e+00) +; CHECK-NEXT: ret double 0x3FE921FB +; + %res = tail call fast double @atan(double 1.0) + ret double %res +} + +declare float @cosf(float) +define float @f_cosf() { +; CHECK-LABEL: @f_cosf( +; CHECK-NEXT: ret float 0x3FE14A2{{.+}} +; + %res = tail call fast float @cosf(float 1.0) + ret float %res +} + +declare float @llvm.cos.f32(float) +define float @i_cosf() { +; CHECK-LABEL: @i_cosf( +; CHECK-NEXT: ret float 0x3FE14A2 +; + %res = tail call fast float @llvm.cos.f32(float 1.0) + ret float %res +} + +declare double @cosh(double) +define double @f_cosh() { +; CHECK-LABEL: @f_cosh( +; CHECK-NEXT: ret double 0x3FF8B075{{.+}} +; + %res = tail call fast double @cosh(double 1.0) + ret double %res +} + +declare float @expf(float) +define float @f_expf() { +; CHECK-LABEL: @f_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @expf(float 1.0) + ret float %res +} + +declare float @llvm.exp.f32(float) +define float @i_expf() { +; CHECK-LABEL: @i_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @llvm.exp.f32(float 1.0) + ret float %res +} + +declare double @exp2(double) +define double @f_exp2() { +; CHECK-LABEL: @f_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @exp2(double 1.0) + ret double %res +} + +declare double @llvm.exp2.f64(double) +define double @i_exp2() { +; CHECK-LABEL: @i_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @llvm.exp2.f64(double 1.0) + ret double %res +} + +; FIXME: exp10() is not widely supported. +declare float @exp10f(float) +define float @f_exp10f() { +; CHECK-LABEL: @f_exp10f( +; CHECK-NEXT: [[RES:%.*]] = tail call float @exp10f(float 1.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = tail call float @exp10f(float 1.0) + ret float %res +} + +declare double @log(double) +define double @f_log() { +; CHECK-LABEL: @f_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log(double 1.0) + ret double %res +} + +declare double @llvm.log.f64(double) +define double @i_log() { +; CHECK-LABEL: @i_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @llvm.log.f64(double 1.0) + ret double %res +} + +declare float @log2f(float) +define float @f_log2f() { +; CHECK-LABEL: @f_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @log2f(float 1.0) + ret float %res +} + +declare float @llvm.log2.f32(float) +define float @i_log2f() { +; CHECK-LABEL: @i_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @llvm.log2.f32(float 1.0) + ret float %res +} + +declare double @log10(double) +define double @f_log10() { +; CHECK-LABEL: @f_log10( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log10(double 1.0) + ret double %res +} + +declare float @sinf(float) +define float @f_sinf() { +; CHECK-LABEL: @f_sinf( +; CHECK-NEXT: ret float 0x3FEAED54{{.+}} +; + %res = tail call fast float @sinf(float 1.0) + ret float %res +} + +declare double @sinh(double) +define double @f_sinh() { +; CHECK-LABEL: @f_sinh( +; CHECK-NEXT: ret double 0x3FF2CD9F{{.+}} +; + %res = tail call fast double @sinh(double 1.0) + ret double %res +} + +declare float @sqrtf(float) +define float @f_sqrtf() { +; CHECK-LABEL: @f_sqrtf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @sqrtf(float 1.0) + ret float %res +} + +declare double @tan(double) +define double @f_tan() { +; CHECK-LABEL: @f_tan( +; CHECK-NEXT: ret double 0x3FF8EB24{{.+}} +; + %res = tail call fast double @tan(double 1.0) + ret double %res +} + +declare float @tanhf(float) +define float @f_tanhf() { +; CHECK-LABEL: @f_tanhf( +; CHECK-NEXT: [[RES:%.*]] = tail call fast float @tanhf(float 1.000000e+00) +; CHECK-NEXT: ret float 0x3FE85EFA{{.+}} +; + %res = tail call fast float @tanhf(float 1.0) + ret float %res +} diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll b/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll index 2c6584bd597d9..b890fbfd1a2e5 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S -o - %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S -o - %s | FileCheck %s declare double @atan2(double, double) define double @f_atan2() { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll b/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll index 1c88bab81a390..72a2abdbcf91f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll @@ -1,109 +1,109 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s - -declare float @nearbyintf(float) #0 -declare float @llvm.nearbyint.f32(float) #0 -declare double @nearbyint(double) #0 -declare double @llvm.nearbyint.f64(double) #0 -declare float @rintf(float) #0 -declare float @llvm.rint.f32(float) #0 -declare double @rint(double) #0 -declare double @llvm.rint.f64(double) #0 - -define float @constant_fold_rint_f32_01() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_01( -; CHECK-NEXT: ret float 1.000000e+00 -; - %x = call float @nearbyintf(float 1.25) #0 - ret float %x -} - -define float @constant_fold_rint_f32_02() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_02( -; CHECK-NEXT: ret float -1.000000e+00 -; - %x = call float @llvm.nearbyint.f32(float -1.25) #0 - ret float %x -} - -define float @constant_fold_rint_f32_03() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_03( -; CHECK-NEXT: ret float 2.000000e+00 -; - %x = call float @rintf(float 1.5) #0 - ret float %x -} - -define float @constant_fold_rint_f32_04() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_04( -; CHECK-NEXT: ret float -2.000000e+00 -; - %x = call float @llvm.rint.f32(float -1.5) #0 - ret float %x -} - -define float @constant_fold_rint_f32_05() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_05( -; CHECK-NEXT: ret float 3.000000e+00 -; - %x = call float @nearbyintf(float 2.75) #0 - ret float %x -} - -define float @constant_fold_rint_f32_06() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_06( -; CHECK-NEXT: ret float -3.000000e+00 -; - %x = call float @llvm.nearbyint.f32(float -2.75) #0 - ret float %x -} - -define double @constant_fold_rint_f64_01() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_01( -; CHECK-NEXT: ret double 1.000000e+00 -; - %x = call double @rint(double 1.3) #0 - ret double %x -} - -define double @constant_fold_rint_f64_02() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_02( -; CHECK-NEXT: ret double -1.000000e+00 -; - %x = call double @llvm.rint.f64(double -1.3) #0 - ret double %x -} - -define double @constant_fold_rint_f64_03() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_03( -; CHECK-NEXT: ret double 2.000000e+00 -; - %x = call double @nearbyint(double 1.5) #0 - ret double %x -} - -define double @constant_fold_rint_f64_04() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_04( -; CHECK-NEXT: ret double -2.000000e+00 -; - %x = call double @llvm.nearbyint.f64(double -1.5) #0 - ret double %x -} - -define double @constant_fold_rint_f64_05() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_05( -; CHECK-NEXT: ret double 3.000000e+00 -; - %x = call double @rint(double 2.7) #0 - ret double %x -} - -define double @constant_fold_rint_f64_06() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_06( -; CHECK-NEXT: ret double -3.000000e+00 -; - %x = call double @llvm.rint.f64(double -2.7) #0 - ret double %x -} - -attributes #0 = { nounwind readnone } +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s + +declare float @nearbyintf(float) #0 +declare float @llvm.nearbyint.f32(float) #0 +declare double @nearbyint(double) #0 +declare double @llvm.nearbyint.f64(double) #0 +declare float @rintf(float) #0 +declare float @llvm.rint.f32(float) #0 +declare double @rint(double) #0 +declare double @llvm.rint.f64(double) #0 + +define float @constant_fold_rint_f32_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @nearbyintf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_03( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @rintf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_04( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.rint.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_05( +; CHECK-NEXT: ret float 3.000000e+00 +; + %x = call float @nearbyintf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_rint_f32_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_06( +; CHECK-NEXT: ret float -3.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_rint_f64_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @rint(double 1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.rint.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_03( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @nearbyint(double 1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_04( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.nearbyint.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_05( +; CHECK-NEXT: ret double 3.000000e+00 +; + %x = call double @rint(double 2.7) #0 + ret double %x +} + +define double @constant_fold_rint_f64_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_06( +; CHECK-NEXT: ret double -3.000000e+00 +; + %x = call double @llvm.rint.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/round.ll b/llvm/test/Transforms/InstSimplify/ConstProp/round.ll index a636160dd12a6..6d9a89a690216 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/round.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/round.ll @@ -1,92 +1,92 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s - -declare float @roundf(float) #0 -declare float @llvm.round.f32(float) #0 -declare double @round(double) #0 -declare double @llvm.round.f64(double) #0 - -; CHECK-LABEL: @constant_fold_round_f32_01 -; CHECK-NEXT: ret float 1.000000e+00 -define float @constant_fold_round_f32_01() #0 { - %x = call float @roundf(float 1.25) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_02 -; CHECK-NEXT: ret float -1.000000e+00 -define float @constant_fold_round_f32_02() #0 { - %x = call float @llvm.round.f32(float -1.25) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_03 -; CHECK-NEXT: ret float 2.000000e+00 -define float @constant_fold_round_f32_03() #0 { - %x = call float @roundf(float 1.5) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_04 -; CHECK-NEXT: ret float -2.000000e+00 -define float @constant_fold_round_f32_04() #0 { - %x = call float @llvm.round.f32(float -1.5) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_05 -; CHECK-NEXT: ret float 3.000000e+00 -define float @constant_fold_round_f32_05() #0 { - %x = call float @roundf(float 2.75) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_06 -; CHECK-NEXT: ret float -3.000000e+00 -define float @constant_fold_round_f32_06() #0 { - %x = call float @llvm.round.f32(float -2.75) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f64_01 -; CHECK-NEXT: ret double 1.000000e+00 -define double @constant_fold_round_f64_01() #0 { - %x = call double @round(double 1.3) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_02 -; CHECK-NEXT: ret double -1.000000e+00 -define double @constant_fold_round_f64_02() #0 { - %x = call double @llvm.round.f64(double -1.3) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_03 -; CHECK-NEXT: ret double 2.000000e+00 -define double @constant_fold_round_f64_03() #0 { - %x = call double @round(double 1.5) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_04 -; CHECK-NEXT: ret double -2.000000e+00 -define double @constant_fold_round_f64_04() #0 { - %x = call double @llvm.round.f64(double -1.5) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_05 -; CHECK-NEXT: ret double 3.000000e+00 -define double @constant_fold_round_f64_05() #0 { - %x = call double @round(double 2.7) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_06 -; CHECK-NEXT: ret double -3.000000e+00 -define double @constant_fold_round_f64_06() #0 { - %x = call double @llvm.round.f64(double -2.7) #0 - ret double %x -} - -attributes #0 = { nounwind readnone } +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s + +declare float @roundf(float) #0 +declare float @llvm.round.f32(float) #0 +declare double @round(double) #0 +declare double @llvm.round.f64(double) #0 + +; CHECK-LABEL: @constant_fold_round_f32_01 +; CHECK-NEXT: ret float 1.000000e+00 +define float @constant_fold_round_f32_01() #0 { + %x = call float @roundf(float 1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_02 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_round_f32_02() #0 { + %x = call float @llvm.round.f32(float -1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_03 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_round_f32_03() #0 { + %x = call float @roundf(float 1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_04 +; CHECK-NEXT: ret float -2.000000e+00 +define float @constant_fold_round_f32_04() #0 { + %x = call float @llvm.round.f32(float -1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_05 +; CHECK-NEXT: ret float 3.000000e+00 +define float @constant_fold_round_f32_05() #0 { + %x = call float @roundf(float 2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_06 +; CHECK-NEXT: ret float -3.000000e+00 +define float @constant_fold_round_f32_06() #0 { + %x = call float @llvm.round.f32(float -2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f64_01 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_round_f64_01() #0 { + %x = call double @round(double 1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_02 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_round_f64_02() #0 { + %x = call double @llvm.round.f64(double -1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_03 +; CHECK-NEXT: ret double 2.000000e+00 +define double @constant_fold_round_f64_03() #0 { + %x = call double @round(double 1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_04 +; CHECK-NEXT: ret double -2.000000e+00 +define double @constant_fold_round_f64_04() #0 { + %x = call double @llvm.round.f64(double -1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_05 +; CHECK-NEXT: ret double 3.000000e+00 +define double @constant_fold_round_f64_05() #0 { + %x = call double @round(double 2.7) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_06 +; CHECK-NEXT: ret double -3.000000e+00 +define double @constant_fold_round_f64_06() #0 { + %x = call double @llvm.round.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll b/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll index 68d20002fe4ab..3548e95620be2 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll @@ -1,105 +1,105 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s - -declare float @truncf(float) #0 -declare float @llvm.trunc.f32(float) #0 -declare double @trunc(double) #0 -declare double @llvm.trunc.f64(double) #0 - -define float @constant_fold_trunc_f32_01() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_01( -; CHECK-NEXT: ret float 1.000000e+00 -; - %x = call float @truncf(float 1.25) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_02() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_02( -; CHECK-NEXT: ret float -1.000000e+00 -; - %x = call float @llvm.trunc.f32(float -1.25) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_03() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_03( -; CHECK-NEXT: ret float 1.000000e+00 -; - %x = call float @truncf(float 1.5) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_04() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_04( -; CHECK-NEXT: ret float -1.000000e+00 -; - %x = call float @llvm.trunc.f32(float -1.5) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_05() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_05( -; CHECK-NEXT: ret float 2.000000e+00 -; - %x = call float @truncf(float 2.75) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_06() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_06( -; CHECK-NEXT: ret float -2.000000e+00 -; - %x = call float @llvm.trunc.f32(float -2.75) #0 - ret float %x -} - -define double @constant_fold_trunc_f64_01() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_01( -; CHECK-NEXT: ret double 1.000000e+00 -; - %x = call double @trunc(double 1.3) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_02() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_02( -; CHECK-NEXT: ret double -1.000000e+00 -; - %x = call double @llvm.trunc.f64(double -1.3) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_03() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_03( -; CHECK-NEXT: ret double 1.000000e+00 -; - %x = call double @trunc(double 1.5) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_04() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_04( -; CHECK-NEXT: ret double -1.000000e+00 -; - %x = call double @llvm.trunc.f64(double -1.5) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_05() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_05( -; CHECK-NEXT: ret double 2.000000e+00 -; - %x = call double @trunc(double 2.7) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_06() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_06( -; CHECK-NEXT: ret double -2.000000e+00 -; - %x = call double @llvm.trunc.f64(double -2.7) #0 - ret double %x -} - -attributes #0 = { nounwind readnone } +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s + +declare float @truncf(float) #0 +declare float @llvm.trunc.f32(float) #0 +declare double @trunc(double) #0 +declare double @llvm.trunc.f64(double) #0 + +define float @constant_fold_trunc_f32_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_03( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_04( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_05( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @truncf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_06( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.trunc.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_trunc_f64_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_03( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_04( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_05( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @trunc(double 2.7) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_06( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.trunc.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll index 6ac6564a08a97..5d3d4a44c02c9 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll @@ -1,32 +1,32 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64" - -; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() { -; CHECK-NEXT: ret <4 x i32*> undef -define <4 x i32*> @fixed_length_version_first() { - %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef - ret <4 x i32*> %ptr -} - -; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() { -; CHECK-NEXT: ret <4 x <4 x i32>*> undef -define <4 x <4 x i32>*> @fixed_length_version_second() { - %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef - ret <4 x <4 x i32>*> %ptr -} - -; CHECK-LABEL: define @vscale_version_first() { -; CHECK-NEXT: ret undef -define @vscale_version_first() { - %ptr = getelementptr i32, undef, undef - ret %ptr -} - -; CHECK-LABEL: define *> @vscale_version_second() { -; CHECK-NEXT: ret *> undef -define *> @vscale_version_second() { - %ptr = getelementptr , * undef, undef - ret *> %ptr -} +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() { +; CHECK-NEXT: ret <4 x i32*> undef +define <4 x i32*> @fixed_length_version_first() { + %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef + ret <4 x i32*> %ptr +} + +; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() { +; CHECK-NEXT: ret <4 x <4 x i32>*> undef +define <4 x <4 x i32>*> @fixed_length_version_second() { + %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef + ret <4 x <4 x i32>*> %ptr +} + +; CHECK-LABEL: define @vscale_version_first() { +; CHECK-NEXT: ret undef +define @vscale_version_first() { + %ptr = getelementptr i32, undef, undef + ret %ptr +} + +; CHECK-LABEL: define *> @vscale_version_second() { +; CHECK-NEXT: ret *> undef +define *> @vscale_version_second() { + %ptr = getelementptr , * undef, undef + ret *> %ptr +} diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll index 9c1f6730122e2..df9011636a35f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index aa299c1084b7e..70b50da9f0415 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -201,3 +201,53 @@ define i1 @abs_ule_int_min(i8 %x) { %c = icmp ule i8 %abs, 128 ret i1 %c } + +define i32 @select_abs_of_abs_eq(i32 %x) { +; CHECK-LABEL: @select_abs_of_abs_eq( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp eq i32 %x, 0 + %sel = select i1 %cmp, i32 %neg, i32 %abs + ret i32 %sel +} + +define i32 @select_abs_of_abs_ne(i32 %x) { +; CHECK-LABEL: @select_abs_of_abs_ne( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp ne i32 %x, 0 + %sel = select i1 %cmp, i32 %abs, i32 %neg + ret i32 %sel +} + +define i32 @select_nabs_of_abs_eq(i32 %x) { +; CHECK-LABEL: @select_nabs_of_abs_eq( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] +; CHECK-NEXT: ret i32 [[NEG]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp eq i32 %x, 0 + %sel = select i1 %cmp, i32 %abs, i32 %neg + ret i32 %sel +} + +define i32 @select_nabs_of_abs_ne(i32 %x) { +; CHECK-LABEL: @select_nabs_of_abs_ne( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] +; CHECK-NEXT: ret i32 [[NEG]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp ne i32 %x, 0 + %sel = select i1 %cmp, i32 %neg, i32 %abs + ret i32 %sel +} diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index c2c0f9ebf82dd..d646334887b23 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -2133,3 +2133,150 @@ define i8 @umax_lshr(i8 %x, i8 %y) { %max = call i8 @llvm.umax.i8(i8 %x, i8 %shr) ret i8 %max } + +define i8 @umax_dom_cond_uge(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_uge( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[X]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[Y]] +; + %cmp = icmp uge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umax_dom_cond_ugt(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_ugt( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[X]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[Y]] +; + %cmp = icmp ugt i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umax_dom_cond_ule(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_ule( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[Y]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[X]] +; + %cmp = icmp ule i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umax_dom_cond_ult(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_ult( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[Y]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[X]] +; + %cmp = icmp ult i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umin_dom_cond_uge(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_dom_cond_uge( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[Y]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[X]] +; + %cmp = icmp uge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umin.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @smax_dom_cond_sge(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_dom_cond_sge( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[X]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[Y]] +; + %cmp = icmp sge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.smax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.smax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @smin_dom_cond_sge(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_dom_cond_sge( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: ret i8 [[Y]] +; CHECK: false: +; CHECK-NEXT: ret i8 [[X]] +; + %cmp = icmp sge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.smin.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.smin.i8(i8 %x, i8 %y) + ret i8 %m2 +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll index d6e3469c7bdb7..dd6692d75e5f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll @@ -243,3 +243,60 @@ for.end: !71 = distinct !{!71, !72, !73} !72 = !{!"llvm.loop.vectorize.width", i32 4} !73 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__log2f_finite(float) #0 + +; CHECK-LABEL: @log2_f32 +; CHECK: <4 x float> @__svml_log2f4 +; CHECK: ret +define void @log2_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__log2f_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!81 = distinct !{!21, !22, !23} +!82 = !{!"llvm.loop.vectorize.width", i32 4} +!83 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log2_finite(double) #0 + +; CHECK-LABEL: @log2_f64 +; CHECK: <4 x double> @__svml_log24 +; CHECK: ret +define void @log2_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log2_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!31, !32, !33} +!92 = !{!"llvm.loop.vectorize.width", i32 4} +!93 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll index aa8a25c3b87f5..c074830075521 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -28,6 +28,11 @@ declare float @logf(float) #0 declare double @llvm.log.f64(double) #0 declare float @llvm.log.f32(float) #0 +declare double @log2(double) #0 +declare float @log2f(float) #0 +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + declare double @exp2(double) #0 declare float @exp2f(float) #0 declare double @llvm.exp2.f64(double) #0 @@ -501,6 +506,98 @@ for.end: ret void } +define void @log2_f64(double* nocapture %varray) { +; CHECK-LABEL: @log2_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32(float* nocapture %varray) { +; CHECK-LABEL: @log2_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @log2_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @log2_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( ; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]]) diff --git a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll new file mode 100644 index 0000000000000..f3c54579d9d0e --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -aa-pipeline=basic-aa -passes='require,memcpyopt' -verify-memoryssa -S %s | FileCheck %s + +; REQUIRES: asserts + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +%t = type <{ i8*, [4 x i8], i8*, i8*, i32, [8192 x i8] }> + + +define i32 @test1(%t* %ptr) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: invoke.cont6: +; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[T:%.*]], %t* [[PTR:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[P_1_C:%.*]] = bitcast i8** [[P_1]] to i8* +; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds [[T]], %t* [[PTR]], i64 0, i32 4 +; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds [[T]], %t* [[PTR]], i64 0, i32 5, i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8** [[P_1]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 20, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P_2]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 8195, i1 false) +; CHECK-NEXT: ret i32 0 +; +invoke.cont6: + %p.1 = getelementptr inbounds %t, %t* %ptr, i64 0, i32 0 + %p.1.c = bitcast i8** %p.1 to i8* + call void @llvm.memset.p0i8.i64(i8* %p.1.c, i8 0, i64 20, i1 false) + store i8* null, i8** %p.1, align 8 + %p.2 = getelementptr inbounds %t, %t* %ptr, i64 0, i32 4 + store i32 0, i32* %p.2, align 8 + %p.3 = getelementptr inbounds %t, %t* %ptr, i64 0, i32 5, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p.3, i8 0, i64 8191, i1 false) + ret i32 0 +} + +declare i8* @get_ptr() + +define void @test2(i8 *%in) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL_I1_I:%.*]] = tail call i8* @get_ptr() +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[CALL_I1_I]], i64 10 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP0]], i8 0, i64 0, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL_I1_I]], i8* [[IN:%.*]], i64 10, i1 false) +; CHECK-NEXT: ret void +; +entry: + %call.i1.i = tail call i8* @get_ptr() + tail call void @llvm.memset.p0i8.i64(i8* %call.i1.i, i8 0, i64 10, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call.i1.i, i8* %in, i64 10, i1 false) + ret void +} + +declare i8* @malloc(i64) + +define i32 @test3(i8* noalias %in) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call i8* @malloc(i64 20) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL_I_I_I]], i8* [[IN:%.*]], i64 20, i1 false) +; CHECK-NEXT: ret i32 10 +; + %call.i.i.i = tail call i8* @malloc(i64 20) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call.i.i.i, i8* %in, i64 20, i1 false) + ret i32 10 +} + +define void @test4(i32 %n, i8* noalias %ptr.0, i8* noalias %ptr.1, i32* %ptr.2) unnamed_addr { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[ELEM_I:%.*]] = getelementptr i8, i8* [[PTR_0:%.*]], i64 8 +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[PTR_2:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[ELEM_I]], i64 10 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP1]], i8 0, i64 0, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ELEM_I]], i8* [[PTR_1:%.*]], i64 10, i1 false) +; CHECK-NEXT: ret void +; + %elem.i = getelementptr i8, i8* %ptr.0, i64 8 + call void @llvm.memset.p0i8.i64(i8* %elem.i, i8 0, i64 10, i1 false) + store i32 %n, i32* %ptr.2, align 8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %elem.i, i8* %ptr.1, i64 10, i1 false) + ret void +} + +declare void @decompose(%t* nocapture) + +define void @test5(i32* %ptr) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EARLY_DATA:%.*]] = alloca [128 x i8], align 8 +; CHECK-NEXT: [[TMP:%.*]] = alloca [[T:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [128 x i8]* [[EARLY_DATA]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %t* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 32, i8* [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[PTR:%.*]], align 8 +; CHECK-NEXT: call fastcc void @decompose(%t* [[TMP]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i1 false) +; CHECK-NEXT: ret void +; +entry: + %early_data = alloca [128 x i8], align 8 + %tmp = alloca %t, align 8 + %0 = bitcast [128 x i8]* %early_data to i8* + %1 = bitcast %t* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* %0) + %2 = load i32, i32* %ptr, align 8 + call fastcc void @decompose(%t* %tmp) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i1 false) + ret void +} + +define i8 @test6(i8* %ptr, i8* noalias %ptr.1) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* [[PTR:%.*]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[PTR]], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[PTR]], i8* [[PTR_1:%.*]], i64 24, i1 false) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + call void @llvm.lifetime.start.p0i8(i64 24, i8* %ptr) + %0 = load i8, i8* %ptr, align 8 + call void @llvm.memmove.p0i8.p0i8.i64(i8* %ptr, i8* %ptr.1, i64 24, i1 false) + ret i8 %0 +} + +define void @test7([4 x i32]* %ptr) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR]], i64 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR]], i64 0, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR]], i64 0, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 0 + store i32 0, i32* %0, align 1 + %1 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 1 + store i32 0, i32* %1, align 1 + %2 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 2 + store i32 0, i32* %2, align 1 + %3 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 3 + store i32 0, i32* %3, align 1 + call void @clobber() + ret void +} + +declare void @clobber() + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #0 + +attributes #0 = { argmemonly nounwind willreturn } +attributes #1 = { argmemonly nounwind willreturn writeonly } diff --git a/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll index a08e07e9644a4..9a137fb3f3e92 100644 --- a/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll +++ b/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -nary-reassociate -early-cse -S | FileCheck %s +; RUN: opt < %s -nary-reassociate -early-cse -earlycse-debug-hash -S | FileCheck %s ; RUN: opt < %s -passes='nary-reassociate' -S | opt -early-cse -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" diff --git a/llvm/test/Transforms/NewGVN/commute.ll b/llvm/test/Transforms/NewGVN/commute.ll index d7737dd04f97a..f4b53621e4206 100644 --- a/llvm/test/Transforms/NewGVN/commute.ll +++ b/llvm/test/Transforms/NewGVN/commute.ll @@ -29,6 +29,8 @@ define void @cmp(i32 %x, i32 %y) { ret void } +declare i32 @llvm.smax.i32(i32, i32) + define void @intrinsic(i32 %x, i32 %y) { ; CHECK-LABEL: @intrinsic( ; CHECK-NEXT: [[M1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) @@ -42,4 +44,46 @@ define void @intrinsic(i32 %x, i32 %y) { ret void } -declare i32 @llvm.smax.i32(i32, i32) +declare i16 @llvm.smul.fix.i16(i16, i16, i32) +declare i16 @llvm.umul.fix.i16(i16, i16, i32) + +define i16 @intrinsic_3_args(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 1) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.smul.fix.i16(i16 %x, i16 %y, i32 1) + %m2 = call i16 @llvm.smul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +define i16 @intrinsic_3_args_not_same(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args_not_same( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 2) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.umul.fix.i16(i16 %x, i16 %y, i32 2) + %m2 = call i16 @llvm.umul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fma(float %x, float %y) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[M1:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) +; CHECK-NEXT: [[M2:%.*]] = call float @llvm.fma.f32(float [[Y]], float [[X]], float 1.000000e+00) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[M1]], [[M2]] +; CHECK-NEXT: ret float [[R]] +; + %m1 = call float @llvm.fma.f32(float %x, float %y, float 1.0) + %m2 = call float @llvm.fma.f32(float %y, float %x, float 1.0) + %r = fdiv nnan float %m1, %m2 + ret float %r +} diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll index 19b55cc661b00..560ad2fbcd3de 100644 --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -30,6 +30,21 @@ define i32 @bad_use(i32 %0) { ret i32 %2 } +define void @indirect_call(void ()* %0) { +; CHECK-LABEL: define {{[^@]+}}@indirect_call +; CHECK-SAME: (void ()* [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: tail call void [[TMP0]]() +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @omp_get_max_threads() +; CHECK-NEXT: tail call void @use(i32 [[TMP2]]) +; CHECK-NEXT: ret void + call void @omp_set_num_threads(i32 4) + tail call void %0() + %2 = tail call i32 @omp_get_max_threads() + tail call void @use(i32 %2) + ret void +} + define dso_local i32 @foo(i32 %0, i32 %1) { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) diff --git a/llvm/test/Transforms/Reassociate/cse-pairs.ll b/llvm/test/Transforms/Reassociate/cse-pairs.ll index 33397ea050c41..a920f49b59d78 100644 --- a/llvm/test/Transforms/Reassociate/cse-pairs.ll +++ b/llvm/test/Transforms/Reassociate/cse-pairs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -reassociate -early-cse -S < %s | FileCheck %s +; RUN: opt -reassociate -early-cse -earlycse-debug-hash -S < %s | FileCheck %s @num1 = local_unnamed_addr global i32 0, align 4 @num2 = local_unnamed_addr global i32 0, align 4 diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll new file mode 100644 index 0000000000000..d06b94162b5be --- /dev/null +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -ipsccp -S %s | FileCheck %s + +declare i8 @llvm.abs.i8(i8, i1) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1) +declare i8 @llvm.umax.i8(i8, i8) + +declare void @use(i1) +declare void @use_vec(<2 x i1>) + +define void @abs1(i8* %p) { +; CHECK-LABEL: @abs1( +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[P:%.*]], align 1, [[RNG0:!range !.*]] +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X]], i1 false) +; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i8 [[ABS]], 0 +; CHECK-NEXT: call void @use(i1 [[CMP1]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[ABS]], 10 +; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp sge i8 [[ABS]], 1 +; CHECK-NEXT: call void @use(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i8 [[ABS]], 9 +; CHECK-NEXT: call void @use(i1 [[CMP4]]) +; CHECK-NEXT: ret void +; + %x = load i8, i8* %p, !range !{i8 -9, i8 10} + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp1 = icmp sge i8 %abs, 0 + call void @use(i1 %cmp1) + %cmp2 = icmp slt i8 %abs, 10 + call void @use(i1 %cmp2) + %cmp3 = icmp sge i8 %abs, 1 + call void @use(i1 %cmp3) + %cmp4 = icmp slt i8 %abs, 9 + call void @use(i1 %cmp4) + ret void +} + +; Even if we don't know anything about the input range of the operand, +; we still know something about the result range of abs(). +define void @abs2(i8 %x) { +; CHECK-LABEL: @abs2( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 true) +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[ABS]], 0 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp sge i8 %abs, 0 + call void @use(i1 %cmp) + ret void +} + +define void @abs2_vec(<2 x i8> %x) { +; CHECK-LABEL: @abs2_vec( +; CHECK-NEXT: [[ABS:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[X:%.*]], i1 true) +; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i8> [[ABS]], zeroinitializer +; CHECK-NEXT: call void @use_vec(<2 x i1> [[CMP]]) +; CHECK-NEXT: ret void +; + %abs = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %x, i1 true) + %cmp = icmp sge <2 x i8> %abs, zeroinitializer + call void @use_vec(<2 x i1> %cmp) + ret void +} + +define void @umax1(i8* %p1, i8* %p2) { +; CHECK-LABEL: @umax1( +; CHECK-NEXT: [[X1:%.*]] = load i8, i8* [[P1:%.*]], align 1, [[RNG1:!range !.*]] +; CHECK-NEXT: [[X2:%.*]] = load i8, i8* [[P2:%.*]], align 1, [[RNG2:!range !.*]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X1]], i8 [[X2]]) +; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i8 [[M]], 5 +; CHECK-NEXT: call void @use(i1 [[CMP1]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[M]], 15 +; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp uge i8 [[M]], 6 +; CHECK-NEXT: call void @use(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i8 [[M]], 14 +; CHECK-NEXT: call void @use(i1 [[CMP4]]) +; CHECK-NEXT: ret void +; + %x1 = load i8, i8* %p1, !range !{i8 0, i8 10} + %x2 = load i8, i8* %p2, !range !{i8 5, i8 15} + %m = call i8 @llvm.umax.i8(i8 %x1, i8 %x2) + %cmp1 = icmp uge i8 %m, 5 + call void @use(i1 %cmp1) + %cmp2 = icmp ult i8 %m, 15 + call void @use(i1 %cmp2) + %cmp3 = icmp uge i8 %m, 6 + call void @use(i1 %cmp3) + %cmp4 = icmp ult i8 %m, 14 + call void @use(i1 %cmp4) + ret void +} + +define void @umax2(i8 %x) { +; CHECK-LABEL: @umax2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 10) +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[M]], 10 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 10) + %cmp = icmp uge i8 %m, 10 + call void @use(i1 %cmp) + ret void +} diff --git a/llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll b/llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll new file mode 100644 index 0000000000000..d8b5fbff4e628 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll @@ -0,0 +1,62 @@ +; if IPSCCP determines a function returns undef, +; then the "returned" attribute of input arguments +; should be cleared. + +; RUN: opt < %s -ipsccp -S | FileCheck %s +define i32 @main() { +; CHECK-LABEL: @main +entry: +; CHECK-NEXT: entry: + %call = call i32 @func_return_undef(i32 returned 1) +; CHECK: call i32 @func_return_undef(i32 1) +; CHECK-NOT: returned + ret i32 %call +; CHECK: ret i32 1 +} + +define internal i32 @func_return_undef(i32 returned %arg) { +; CHECK: {{define.*@func_return_undef}} +; CHECK-NOT: returned +entry: +; CHECK-NEXT: entry: +; CHECK-NEXT: {{ret.*undef}} + ret i32 %arg +} + + +; The only case that users of zapped functions are non-call site +; users is that they are blockaddr users. Skip them because we +; want to remove the returned attribute for call sites + +; CHECK: {{define.*@blockaddr_user}} +; CHECK-NOT: returned +define internal i32 @blockaddr_user(i1 %c, i32 returned %d) { +entry: + br i1 %c, label %bb1, label %bb2 + +bb1: + br label %branch.block + +bb2: + br label %branch.block + +branch.block: + %addr = phi i8* [blockaddress(@blockaddr_user, %target1), %bb1], [blockaddress(@blockaddr_user, %target2), %bb2] + indirectbr i8* %addr, [label %target1, label %target2] + +target1: + br label %target2 + +; CHECK: ret i32 undef +target2: + ret i32 %d +} + +define i32 @call_blockaddr_user(i1 %c) { +; CHECK-LABEL: define i32 @call_blockaddr_user( +; CHECK-NEXT: %r = call i32 @blockaddr_user(i1 %c +; CHECK-NOT: returned +; CHECK-NEXT: ret i32 10 + %r = call i32 @blockaddr_user(i1 %c, i32 returned 10) + ret i32 %r +} diff --git a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll new file mode 100644 index 0000000000000..5857ce2d30b72 --- /dev/null +++ b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -ipsccp -S %s | FileCheck %s + +@y = common global [1 x i32] zeroinitializer, align 4 +@x = common global [1 x i32] zeroinitializer, align 4 + +define i32 @eq_undereferenceable(i32* %p) { +; CHECK-LABEL: @eq_undereferenceable( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + %cmp = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + ret i32 %0 +} + + +define i32 @eq_dereferenceable(i32* %p) { +; CHECK-LABEL: @eq_dereferenceable( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0) +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + %cmp = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 0) + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + ret i32 %0 +} + +define i1 @eq_undereferenceable_cmp_simp(i32* %p) { +; CHECK-LABEL: @eq_undereferenceable_cmp_simp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: br i1 [[CMP_0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: ret i1 true +; CHECK: if.end: +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32* [[P]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: ret i1 [[CMP_2]] +; +entry: + %cmp.0 = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + br i1 %cmp.0, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %p, align 4 + %cmp.1 = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + ret i1 %cmp.1 + +if.end: ; preds = %if.then, %entry + %cmp.2 = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + ret i1 %cmp.2 +} diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll index 2bcbaff50a973..ffb13ca583f7f 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll @@ -169,19 +169,34 @@ cond.end: } define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 { -; CHECK-LABEL: @or_predicate_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @or_predicate_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 +; THUMB-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @or_predicate_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp sgt i32 %d, 3 @@ -202,19 +217,34 @@ cond.end: } define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 { -; CHECK-LABEL: @or_invert_predicate_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @or_invert_predicate_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 +; THUMB-NEXT: br i1 [[CMP]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @or_invert_predicate_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3 +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp sgt i32 %d, 3 @@ -267,19 +297,33 @@ cond.end: } define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 { -; CHECK-LABEL: @or_xor_predicate_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @or_xor_predicate_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: br i1 [[CMP:%.*]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @or_xor_predicate_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: br i1 %cmp, label %lor.lhs.false, label %cond.end @@ -331,19 +375,33 @@ cond.end: } define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 { -; CHECK-LABEL: @and_xor_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @and_xor_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: br i1 [[CMP:%.*]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_FALSE:%.*]], label [[COND_END]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @and_xor_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: br i1 %cmp, label %cond.end, label %lor.lhs.false diff --git a/llvm/test/Transforms/SimplifyCFG/merge-default.ll b/llvm/test/Transforms/SimplifyCFG/merge-default.ll new file mode 100644 index 0000000000000..93b64d708807a --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-default.ll @@ -0,0 +1,45 @@ +; RUN: opt -simplifycfg -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g() +declare void @f() + +define void @foo(i32 %Kind) { +; CHECK-LABEL: @foo( +; CHECK-NEXT:entry: +; CHECK-NEXT: switch i32 %Kind, label %sw.epilog [ +; CHECK-NEXT: i32 15, label %sw.bb2 +; CHECK-NEXT: i32 2, label %sw.bb +; CHECK-NEXT: ] +; CHECK: sw.bb: +; CHECK-NEXT: call void @g() +; CHECK-NEXT: call void @g() +; CHECK-NEXT: br label %sw.epilog +; CHECK: sw.bb2: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: br label %sw.epilog +; CHECK: sw.epilog: +; CHECK-NEXT: ret void +; CHECK-NEXT:} + +entry: + switch i32 %Kind, label %sw.epilog [ + i32 1, label %sw.epilog + i32 2, label %sw.bb + i32 15, label %sw.bb2 + ] + +sw.bb: + call void @g() + call void @g() + br label %sw.epilog + +sw.bb2: + call void @f() + br label %sw.epilog + +sw.epilog: + ret void +} diff --git a/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg b/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..7184443994b69 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll new file mode 100644 index 0000000000000..e474e1d2d75e9 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -vector-combine -S -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test checks we are not crashing with TTI when trying to get shuffle cost. +; This test also check that shuffle mask zeroinitializer is +; not narrowed into <0, 1, 0, 1, ...>, which we cannot reason if it's a valid +; splat or not. + +define @bitcast_shuffle( %a) { +; CHECK-LABEL: @bitcast_shuffle( +; CHECK-NEXT: [[I:%.*]] = shufflevector [[A:%.*]], undef, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = bitcast [[I]] to +; CHECK-NEXT: ret [[R]] +; + %i = shufflevector %a, undef, zeroinitializer + %r = bitcast %i to + ret %r +} diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index e24ffb8da66f2..f0c5b6ef7ad81 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -346,12 +346,11 @@ define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(1 ret <4 x float> %r } -; TODO: Should load v4i32. - define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v8i32( -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -359,13 +358,10 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ret <8 x i32> %r } -; TODO: Should load v4i32. - define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( -; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i32>* [[P:%.*]] to i32* -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %b = bitcast <4 x i32>* %p to i32* @@ -374,12 +370,11 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl ret <8 x i32> %r } -; TODO: Should load v4f32. - define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v16f32( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <16 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -387,12 +382,11 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) ret <16 x float> %r } -; TODO: Should load v4f32. - define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v2f32( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %s = load float, float* %p, align 4 diff --git a/llvm/test/Verifier/weak-dllimport.ll b/llvm/test/Verifier/weak-dllimport.ll new file mode 100644 index 0000000000000..c7b6ed977b100 --- /dev/null +++ b/llvm/test/Verifier/weak-dllimport.ll @@ -0,0 +1,28 @@ +; RUN: opt -verify < %s 2>&1 | FileCheck %s +; CHECK-NOT: Global is marked as dllimport, but not external + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.11.0" + +@"?var_hook@@3HA" = extern_weak dllimport global i32, align 4 + +; Function Attrs: noinline optnone uwtable +define dso_local zeroext i1 @"?foo@@YA_NPEAHH@Z"(i32* %0, i32 %1) #0 { + ret i1 0 +} + +declare extern_weak dllimport void @func_hook(i32) #1 + +attributes #0 = { noinline optnone uwtable } +attributes #1 = { uwtable } + +; Compiled from the following C++ example with --target=x86_64-pc-win32, +; using the non-checking configuration +;__declspec(dllimport) __attribute__((weak)) extern "C" void func_hook(int); +;extern __declspec(dllimport) __attribute__((weak)) int var_hook; +;bool foo(int *q, int p) +;{ +; if (func_hook) +; func_hook(p); +; return &var_hook == q; +;} diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml index 0775de8907ead..b0970cdac8b24 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml @@ -101,7 +101,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101030E1305110155170000022E00030E110112060000032E00030E11011201000000 - sectname: __debug_info segname: __DWARF addr: 0x0000000000000024 @@ -114,7 +113,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 630000000400000000000801010000000200000000000000000000000000020D0000000000000000000000200000000317000000000000000000000030000000000000000221000000001000000000000000100000022600000000200000000000000010000000 - sectname: __debug_ranges segname: __DWARF addr: 0x000000000000008B @@ -127,7 +125,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: '0020000000000000003000000000000000000000000000002000000000000000000000000000000030000000000000000010000000000000002000000000000000000000000000000000000000000000' - sectname: __debug_str segname: __DWARF addr: 0x00000000000000DB @@ -140,7 +137,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 002F746D702F6D61696E2E630073747269707065643100737472697070656432006D61696E00666F6F00 - cmd: LC_SYMTAB cmdsize: 24 symoff: 0 diff --git a/llvm/test/tools/llvm-dwarfdump/debug-str.yaml b/llvm/test/tools/llvm-dwarfdump/debug-str.yaml index 36729c1823107..0f8cf2f199029 100644 --- a/llvm/test/tools/llvm-dwarfdump/debug-str.yaml +++ b/llvm/test/tools/llvm-dwarfdump/debug-str.yaml @@ -44,3 +44,16 @@ Sections: # ESCAPED-NEXT: 0x00000002: "\001" # ESCAPED-NEXT: 0x00000004: "\\001" # ESCAPED-EMPTY: + +## c) Test that llvm-dwarfdump emits a warning when it encounters a string without a null terminator. + +## "abc\0" "abc" +# RUN: yaml2obj -DCONTENT="61626300616263" %s -o %t3.o +# RUN: llvm-dwarfdump --debug-str %t3.o 2>&1 | FileCheck %s --check-prefix=WARN + +# WARN: .debug_str contents: +# WARN-NEXT: 0x00000000: "abc" +# WARN-NEXT: warning: no null terminated string at offset 0x4 +# WARN: .debug_str.dwo contents: +# WARN-NEXT: 0x00000000: "abc" +# WARN-NEXT: warning: no null terminated string at offset 0x4 diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml index d516b3704e71c..0315e18795bc0 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml @@ -139,7 +139,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 37000000040020000000010101FB0E0D000101010100000001000001006D61696E2E6370700000000000000502F0BF00000105020A9F0206000101 - sectname: __debug_pubnames segname: __DWARF addr: 0x000000000000D03B @@ -152,7 +151,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1700000002000000000077000000260000006D61696E0000000000 - sectname: __debug_pubtypes segname: __DWARF addr: 0x000000000000D056 @@ -165,7 +163,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1F0000000200000000007700000059000000696E74006F000000636861720000000000 - sectname: __debug_aranges segname: __DWARF addr: 0x000000000000D079 @@ -178,7 +175,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1C000000020000000000040000000000F0BF0000100000000000000000000000 - sectname: __debug_info segname: __DWARF addr: 0x000000000000D099 @@ -191,7 +187,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 73000000040000000000040101000000040031000000000000003A000000F0BF00001000000002F0BF00001000000001573F0000000101590000000103027D044400000001015900000003027D004900000001016000000000044E00000005040565000000056A000000066F0000000452000000060100 - sectname: __debug_abbrev segname: __DWARF addr: 0x000000000000D110 @@ -204,7 +199,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101250E1305030E10171B0EB44219110112060000022E01110112064018030E3A0B3B0B49103F19E37F0C00000305000218030E3A0B3B0B49100000042400030E3E0B0B0B0000050F00491000000626004910000000 - sectname: __debug_str segname: __DWARF addr: 0x000000000000D167 @@ -217,7 +211,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 004170706C6520636C616E672076657273696F6E2031312E302E302028636C616E672D313130302E302E33332E313729006D61696E2E637070002F746D70006D61696E0061726763006172677600696E74006368617200 - sectname: __apple_names segname: __DWARF addr: 0x000000000000D1BE @@ -614,7 +607,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 3B000000040020000000010101FB0E0D000101010100000001000001006D61696E2E63707000000000000009029C7F0000010000000105020AF3020C000101 - sectname: __debug_pubnames segname: __DWARF addr: 0x000000010000903F @@ -627,7 +619,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 170000000200000000007E0000002A0000006D61696E0000000000 - sectname: __debug_pubtypes segname: __DWARF addr: 0x000000010000905A @@ -640,7 +631,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1F0000000200000000007E00000060000000696E740076000000636861720000000000 - sectname: __debug_aranges segname: __DWARF addr: 0x000000010000907D @@ -653,7 +643,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 2C0000000200000000000800000000009C7F0000010000001C0000000000000000000000000000000000000000000000 - sectname: __debug_info segname: __DWARF addr: 0x00000001000090AD @@ -666,7 +655,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 7A000000040000000000080101000000040031000000000000003A0000009C7F0000010000001C000000029C7F0000010000001C000000016F3F0000000101600000000302910844000000010160000000030291004900000001016700000000044E0000000504056C000000057100000006760000000452000000060100 - sectname: __debug_abbrev segname: __DWARF addr: 0x000000010000912B @@ -679,7 +667,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101250E1305030E10171B0EB44219110112060000022E01110112064018030E3A0B3B0B49103F1900000305000218030E3A0B3B0B49100000042400030E3E0B0B0B0000050F00491000000626004910000000 - sectname: __debug_str segname: __DWARF addr: 0x000000010000917F @@ -692,7 +679,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 004170706C6520636C616E672076657273696F6E2031312E302E302028636C616E672D313130302E302E33332E313729006D61696E2E637070002F746D70006D61696E0061726763006172677600696E74006368617200 - sectname: __apple_names segname: __DWARF addr: 0x00000001000091D6 diff --git a/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml index a1d3a50c6c82f..8a358ea0700c2 100644 --- a/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml @@ -215,7 +215,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 66000000040020000000010101FB0E0D000101010100000001000001006D61696E2E6370700000000000000902700F00000100000001050C0A75050B063C05033C0204000101000902900F00000100000015050E0A083D050C63050B063C0506063F05023D0202000101 - sectname: __debug_pubnames segname: __DWARF addr: 0x000000010000206A @@ -228,7 +227,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 2B000000020000000000E00000002A0000005F5A33666F6F69002A000000666F6F00690000006D61696E0000000000 - sectname: __debug_pubtypes segname: __DWARF addr: 0x0000000100002099 @@ -241,7 +239,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1F000000020000000000E000000062000000696E7400D8000000636861720000000000 - sectname: __debug_aranges segname: __DWARF addr: 0x00000001000020BC @@ -254,7 +251,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 3C000000020000000000080000000000700F0000010000001100000000000000900F000001000000250000000000000000000000000000000000000000000000 - sectname: __debug_info segname: __DWARF addr: 0x00000001000020FC @@ -267,7 +263,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: DC000000040000000000080101000000040031000000000000003A000000700F0000010000004500000002700F000001000000110000000156460000000302917C5600000000043F0000004700000001016200000001054B00000001016200000000064D000000050407900F000001000000250000000156510000000104620000000802917456000000010462000000080291685B0000000104C9000000090291644B0000000105620000000A46000000AA0F0000010000000600000001050302917C5600000000000BCE0000000BD30000000CD80000000660000000060100 - sectname: __debug_abbrev segname: __DWARF addr: 0x00000001000021DC @@ -280,7 +275,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101250E1305030E10171B0EB44219110112060000022E0111011206401831100000030500021831100000042E016E0E030E3A0B3B0B49103F19200B0000050500030E3A0B3B0B49100000062400030E3E0B0B0B0000072E01110112064018030E3A0B3B0B49103F1900000805000218030E3A0B3B0B491000000934000218030E3A0B3B0B491000000A1D01311011011206580B590B00000B0F00491000000C26004910000000 - sectname: __debug_str segname: __DWARF addr: 0x0000000100002284 @@ -293,7 +287,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 004170706C6520636C616E672076657273696F6E2031312E302E302028636C616E672D313130302E302E33332E313729006D61696E2E637070002F746D70005F5A33666F6F6900666F6F006900696E74006D61696E00617267630061726776006368617200 - sectname: __apple_names segname: __DWARF addr: 0x00000001000022E9 diff --git a/llvm/test/tools/llvm-ml/line_continuations.test b/llvm/test/tools/llvm-ml/line_continuations.test new file mode 100644 index 0000000000000..604bbe91b32af --- /dev/null +++ b/llvm/test/tools/llvm-ml/line_continuations.test @@ -0,0 +1,17 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.code + +t1: +mov eax, \ + ebx +# CHECK: t1: +# CHECK-NEXT: mov eax, ebx + +t2: +mov eax, [ebx + \ + 1] +# CHECK: t2: +# CHECK-NEXT: mov eax, dword ptr [ebx + 1] + +END diff --git a/llvm/test/tools/llvm-ml/named_bitwise_operators.test b/llvm/test/tools/llvm-ml/named_bitwise_operators.test new file mode 100644 index 0000000000000..f122dbe842d0f --- /dev/null +++ b/llvm/test/tools/llvm-ml/named_bitwise_operators.test @@ -0,0 +1,20 @@ +; RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +t1 BYTE NOT 1 +; CHECK: t1: +; CHECK-NEXT: .byte -2 + +t2 BYTE 1 OR 2 +; CHECK: t2: +; CHECK-NEXT: .byte 3 + +t3 BYTE 6 AND 10 +; CHECK: t3: +; CHECK-NEXT: .byte 2 + +.code +xor eax, eax + +END diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test index fa85ecd455dda..38fc763fc7e1f 100644 --- a/llvm/test/tools/llvm-ml/struct.test +++ b/llvm/test/tools/llvm-ml/struct.test @@ -34,11 +34,9 @@ t1 foobar <> ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 2 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 6 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "abcde", plus alignment padding ; CHECK-NEXT: .byte 97 @@ -65,11 +63,9 @@ t2 FOOBAR <"gh",,<10,11>,<12>,"ijk"> ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .byte 11 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 12 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "ijk", padded with " ", plus alignment padding ; CHECK-NEXT: .byte 105 @@ -87,16 +83,16 @@ mov eax, [t2].f.h mov eax, [t2.f.h] ; CHECK: t3: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t4: mov eax, j.FOOBAR.f.h mov eax, j.baz.b ; CHECK: t4: -; CHECK-NEXT: mov eax, dword ptr [rip + j+12] +; CHECK-NEXT: mov eax, dword ptr [rip + j+11] ; CHECK-NEXT: mov eax, dword ptr [rip + j+1] t5: @@ -105,9 +101,9 @@ mov eax, [ebx.FOOBAR].f.h mov eax, [ebx.FOOBAR.f.h] ; CHECK: t5: -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] t6: mov eax, t2.FOOBAR.f.h @@ -116,10 +112,10 @@ mov eax, [t2.FOOBAR].f.h mov eax, [t2.FOOBAR.f.h] ; CHECK: t6: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t7: mov eax, [ebx].FOOBAR.e.b @@ -185,7 +181,7 @@ mov eax, FOOBAR.f.h ; CHECK: t10: ; CHECK-NEXT: mov eax, 10 -; CHECK-NEXT: mov eax, 12 +; CHECK-NEXT: mov eax, 11 t11: mov eax, (FOOBAR PTR [ebx]).f diff --git a/llvm/test/tools/llvm-ml/variable.test b/llvm/test/tools/llvm-ml/variable.test new file mode 100644 index 0000000000000..4e89d67bd59dd --- /dev/null +++ b/llvm/test/tools/llvm-ml/variable.test @@ -0,0 +1,13 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data +t1_value equ 1 or 2 + +t1 BYTE t1_value DUP (0) +; CHECK: t1: +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NOT: .byte 0 + +END diff --git a/llvm/test/tools/llvm-objdump/MachO/section-contents.test b/llvm/test/tools/llvm-objdump/MachO/section-contents.test index cd68e1fa550b4..d938e903fd079 100644 --- a/llvm/test/tools/llvm-objdump/MachO/section-contents.test +++ b/llvm/test/tools/llvm-objdump/MachO/section-contents.test @@ -1,16 +1,16 @@ RUN: llvm-objdump --macho -s %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s -CHECK: Contents of section __text: +CHECK: Contents of section __TEXT,__text: CHECK: 0000 554889e5 4883ec20 488d0500 000000c7 UH..H.. H....... CHECK: 0010 45fc0000 0000897d f8488975 f0488955 E......}.H.u.H.U CHECK: 0020 e84889c7 b000e800 000000b9 00000000 .H.............. CHECK: 0030 8945e489 c84883c4 205dc3 .E...H.. ]. -CHECK: Contents of section __cstring: +CHECK: Contents of section __TEXT,__cstring: CHECK: 003b 48656c6c 6f20776f 726c640a 00 Hello world.. -CHECK: Contents of section __compact_unwind: +CHECK: Contents of section __LD,__compact_unwind: CHECK: 0048 00000000 00000000 3b000000 00000001 ........;....... CHECK: 0058 00000000 00000000 00000000 00000000 ................ -CHECK: Contents of section __eh_frame: +CHECK: Contents of section __TEXT,__eh_frame: CHECK: 0068 14000000 00000000 017a5200 01781001 .........zR..x.. CHECK: 0078 100c0708 90010000 24000000 1c000000 ........$....... CHECK: 0088 78ffffff ffffffff 3b000000 00000000 x.......;....... diff --git a/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test b/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test index aefc0b0f0e16d..59cf7155e2eb5 100644 --- a/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test +++ b/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test @@ -6,28 +6,28 @@ # LLVM: Relocations [ # LLVM-NEXT: Section (3) .rel.text { -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 2 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 2 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file # LLVM-NEXT: 0x2 R_X86_64_NONE - 0x0 -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 4 in section 3: invalid section index: 255 -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 5 in section 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 4 in SHT_REL section with index 3: invalid section index: 255 +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 5 in SHT_REL section with index 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table # LLVM-NEXT: } # LLVM-NEXT: Section (4) .rela.text { -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_RELA section with index 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM # LLVM-NEXT: } # LLVM-NEXT: ] # GNU: Relocation section '.rel.text' at offset 0x41 contains 5 entries: # GNU-NEXT: Offset Info Type Symbol's Value Symbol's Name -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 2 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 2 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file # GNU-NEXT: 0000000000000002 0000000000000000 R_X86_64_NONE -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 4 in section 3: invalid section index: 255 -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 5 in section 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 4 in SHT_REL section with index 3: invalid section index: 255 +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 5 in SHT_REL section with index 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table # GNU-EMPTY: # GNU-NEXT: Relocation section '.rela.text' at offset 0x91 contains 1 entries: # GNU-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_RELA section with index 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM --- !ELF FileHeader: diff --git a/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test b/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test index 0db833de45ae3..c0fa2c8029347 100644 --- a/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test +++ b/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test @@ -186,7 +186,7 @@ Symbols: # SHORT-GNU: Stack Sizes: # SHORT-GNU-NEXT: Size Function # SHORT-GNU-NEXT: 8 foo -# SHORT-GNU-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into section .stack_sizes while trying to extract a stack size entry +# SHORT-GNU-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into SHT_PROGBITS section with index 2 while trying to extract a stack size entry # SHORT-GNU-NEXT: 8 foo # SHORT-LLVM: StackSizes [ @@ -194,7 +194,7 @@ Symbols: # SHORT-LLVM-NEXT: Function: foo # SHORT-LLVM-NEXT: Size: 0x8 # SHORT-LLVM-NEXT: } -# SHORT-LLVM-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into section .stack_sizes while trying to extract a stack size entry +# SHORT-LLVM-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into SHT_PROGBITS section with index 2 while trying to extract a stack size entry # SHORT-LLVM-NEXT: Entry { # SHORT-LLVM-NEXT: Function: foo # SHORT-LLVM-NEXT: Size: 0x8 @@ -361,9 +361,8 @@ Symbols: # RUN: llvm-readelf --stack-sizes %t06 2>&1 | FileCheck %s --check-prefix=BADSIZE -DFILE=%t06 # RUN: llvm-readobj --stack-sizes %t06 2>&1 | FileCheck %s --check-prefix=BADSIZE -DFILE=%t06 -## TODO: these messages should be improved to include section indices. -# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in section .stack_sizes -# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in section .stack_sizes +# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in SHT_PROGBITS section with index 2 +# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in SHT_PROGBITS section with index 3 --- !ELF FileHeader: @@ -460,7 +459,7 @@ Symbols: # NORELOCSECTION-OUT-LLVM: StackSizes [ # NORELOCSECTION-OUT-LLVM-NEXT: ] -# NORELOCSECTION-ERR: warning: '[[FILE]]': section .stack_sizes does not have a corresponding relocation section +# NORELOCSECTION-ERR: warning: '[[FILE]]': .stack_sizes (SHT_PROGBITS section with index 2) does not have a corresponding relocation section --- !ELF FileHeader: diff --git a/llvm/test/tools/llvm-symbolizer/use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s new file mode 100644 index 0000000000000..aed7d43d33916 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s @@ -0,0 +1,7 @@ +# REQUIRES: x86-registered-target + +# RUN: llvm-mc -filetype=obj -triple=x86_64 -g %s -o %t.o + +## --use-symbol-table=true is used by old asan_symbolize.py and Android ndk +## ndk-stack.py. Keep it as a no-op compatibility option for a while. +# RUN: llvm-symbolizer --use-symbol-table=true %t.o diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml new file mode 100644 index 0000000000000..e058642877243 --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml @@ -0,0 +1,101 @@ +## Test how we dump the .debug_str section. + +## a) Test dumping a .debug_str section with a default section header. + +# RUN: yaml2obj --docnum=1 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC --implicit-check-not='Name: .debug_str' + +## b) Test dumping a .debug_str section whose section header properties are overridden. + +## Override the sh_type field. +# RUN: yaml2obj --docnum=1 -DTYPE=STRTAB %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON \ +# RUN: -DTYPE=STRTAB -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 + +## Override the sh_flags field. +# RUN: yaml2obj --docnum=1 -DFLAGS=[SHF_ALLOC] %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_ALLOC ]" -D#%x,ADDRALIGN=1 + +## Override the sh_link field. +# RUN: yaml2obj --docnum=1 -DLINK=.sec %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,LINK \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -DLINK=.sec -D#%x,ADDRALIGN=1 + +## Override the sh_addr field. +# RUN: yaml2obj --docnum=1 -DADDRESS=0x2020 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,ADDRESS \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 -D#%x,ADDRESS=0x2020 + +## Override the sh_addralign field. +# RUN: yaml2obj --docnum=1 -DADDRALIGN=3 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=3 + +## Override the sh_entsize field (sh_entsize=3). +# RUN: yaml2obj --docnum=1 -DENTSIZE=3 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,ENTSIZE \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 -D#%x,ENTSIZE=3 + +## Override the sh_entsize field (sh_entsize=0). +# RUN: yaml2obj --docnum=1 -DENTSIZE=0 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,ENTSIZE \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 -D#%x,ENTSIZE=0 + +## Override the sh_info field. +# RUN: yaml2obj --docnum=1 -DINFO=3 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,INFO \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,INFO=3 -D#%x,ADDRALIGN=1 -D#%x,ENTSIZE=1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_str + Type: SHT_[[TYPE=PROGBITS]] + Flags: [[FLAGS=]] + Link: [[LINK='']] + EntSize: [[ENTSIZE=1]] + Info: [[INFO=]] + AddressAlign: [[ADDRALIGN=1]] + Address: [[ADDRESS=]] + - Name: .sec + Type: SHT_PROGBITS +DWARF: + debug_str: + - a + - b + - abc + +# COMMON: - Name: .debug_str +# COMMON-NEXT: Type: SHT_[[TYPE]] +# COMMON-NEXT: Flags: [[FLAGS]] +# LINK-NEXT: Link: .sec +# ADDRESS-NEXT: Address: 0x[[#%.16x,ADDRESS]] +# COMMON-NEXT: AddressAlign: 0x[[#%.16x,ADDRALIGN]] +# ENTSIZE-NEXT: EntSize: 0x[[#%.16x,ENTSIZE]] +# INFO-NEXT: Info: 0x[[#%.16x,INFO]] +# BASIC: DWARF: +# BASIC-NEXT: debug_str: +# BASIC-NEXT: - a +# BASIC-NEXT: - b +# BASIC-NEXT: - abc +# BASIC-NEXT: ... + +## c) Test dumping an empty .debug_str section. + +# RUN: yaml2obj --docnum=2 %s | obj2yaml | FileCheck %s --check-prefix=EMPTY --implicit-check-not=Sections + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_str: [] +# EMPTY-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_str: [] diff --git a/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml b/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml index 161805b30834a..4583fdde27fb0 100644 --- a/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml +++ b/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml @@ -1,16 +1,11 @@ ## Test that macho2yaml dumps the __debug_foo section (unrecognized debug section) ## as a raw content section. -## Due to the current implementation of yaml2macho being buggy, we cannot generate a DWARF section -## where the sectname starts with '__debug_' and the segname is '__DWARF', from a raw content section. -## We've slightly modified the segname to be '__FOO'. macho2yaml will still treat it as a debug -## section. - # RUN: yaml2obj %s | obj2yaml | FileCheck %s --check-prefix=UNRECOGNIZED # UNRECOGNIZED: Sections: # UNRECOGNIZED-NEXT: - sectname: __debug_foo -# UNRECOGNIZED-NEXT: segname: __FOO +# UNRECOGNIZED-NEXT: segname: __DWARF # UNRECOGNIZED-NEXT: addr: 0x0000000000000000 # UNRECOGNIZED-NEXT: size: 5 # UNRECOGNIZED-NEXT: offset: 0x00000210 @@ -48,7 +43,7 @@ LoadCommands: flags: 0 Sections: - sectname: __debug_foo - segname: __FOO + segname: __DWARF addr: 0x00 size: 5 offset: 528 diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml index 52841e167b447..6a8dc84d98aa7 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml @@ -631,3 +631,18 @@ DWARF: [[SIZENAME]]: 3 Entries: - Address: 0x1234 + +## n) Test that the .debug_addr section header is emitted if the "debug_addr" +## entry is empty. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readobj --sections %t12.o | \ +# RUN: FileCheck %s -DSIZE=0 -DADDRALIGN=1 --check-prefix=SHDR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_addr: [] diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index f1cc1f2550b31..a848bf029dbf0 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -17,6 +17,7 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" @@ -30,7 +31,6 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" @@ -802,19 +802,13 @@ Error sanitizeArguments(const Session &S) { } Error loadProcessSymbols(Session &S) { - std::string ErrMsg; - if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, &ErrMsg)) - return make_error(std::move(ErrMsg), inconvertibleErrorCode()); - - char GlobalPrefix = - S.TPC->getTargetTriple().getObjectFormat() == Triple::MachO ? '_' : '\0'; auto InternedEntryPointName = S.ES.intern(EntryPointName); auto FilterMainEntryPoint = [InternedEntryPointName](SymbolStringPtr Name) { return Name != InternedEntryPointName; }; S.MainJD->addGenerator( - ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( - GlobalPrefix, FilterMainEntryPoint))); + ExitOnErr(orc::TPCDynamicLibrarySearchGenerator::GetForTargetProcess( + *S.TPC, std::move(FilterMainEntryPoint)))); return Error::success(); } diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 2d4a3afdc1f93..5abf22d6d6ddf 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -175,6 +175,7 @@ static std::unique_ptr GetOutputStream(StringRef Path) { static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, raw_ostream &OS) { AsmLexer Lexer(MAI); Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); + Lexer.setLexMasmIntegers(true); bool Error = false; while (Lexer.Lex().isNot(AsmToken::Eof)) { diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 6b3ecd9cef193..b63d08b90ff51 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1619,6 +1619,16 @@ collectLocalBranchTargets(ArrayRef Bytes, const MCInstrAnalysis *MIA, } } +static StringRef getSegmentName(const MachOObjectFile *MachO, + const SectionRef &Section) { + if (MachO) { + DataRefImpl DR = Section.getRawDataRefImpl(); + StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); + return SegmentName; + } + return ""; +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1783,12 +1793,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } } - StringRef SegmentName = ""; - if (MachO) { - DataRefImpl DR = Section.getRawDataRefImpl(); - SegmentName = MachO->getSectionFinalSegmentName(DR); - } - + StringRef SegmentName = getSegmentName(MachO, Section); StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName()); // If the section has no symbol at the start, just insert a dummy one. if (Symbols.empty() || Symbols[0].Addr != 0) { @@ -2388,6 +2393,8 @@ void objdump::printSectionHeaders(const ObjectFile *Obj) { } void objdump::printSectionContents(const ObjectFile *Obj) { + const MachOObjectFile *MachO = dyn_cast(Obj); + for (const SectionRef &Section : ToolSectionFilter(*Obj)) { StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t BaseAddr = Section.getAddress(); @@ -2395,7 +2402,11 @@ void objdump::printSectionContents(const ObjectFile *Obj) { if (!Size) continue; - outs() << "Contents of section " << Name << ":\n"; + outs() << "Contents of section "; + StringRef SegmentName = getSegmentName(MachO, Section); + if (!SegmentName.empty()) + outs() << SegmentName << ","; + outs() << Name << ":\n"; if (Section.isBSS()) { outs() << format("\n", @@ -2553,11 +2564,9 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, } else if (Section == O->section_end()) { outs() << "*UND*"; } else { - if (MachO) { - DataRefImpl DR = Section->getRawDataRefImpl(); - StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); + StringRef SegmentName = getSegmentName(MachO, *Section); + if (!SegmentName.empty()) outs() << SegmentName << ","; - } StringRef SectionName = unwrapOrError(Section->getName(), FileName); outs() << SectionName; } diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index c1db03a61c9fa..d753185177050 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -167,6 +167,11 @@ const Decoder::RingEntry Decoder::Ring64[] = { { 0xff, 0xe3, 1, &Decoder::opcode_nop }, { 0xff, 0xe4, 1, &Decoder::opcode_end }, { 0xff, 0xe5, 1, &Decoder::opcode_end_c }, + { 0xff, 0xe6, 1, &Decoder::opcode_save_next }, + { 0xff, 0xe8, 1, &Decoder::opcode_trap_frame }, + { 0xff, 0xe9, 1, &Decoder::opcode_machine_frame }, + { 0xff, 0xea, 1, &Decoder::opcode_context }, + { 0xff, 0xec, 1, &Decoder::opcode_clear_unwound_to_call }, }; void Decoder::printRegisters(const std::pair &RegisterMask) { @@ -776,6 +781,47 @@ bool Decoder::opcode_end_c(const uint8_t *OC, unsigned &Offset, unsigned Length, return true; } +bool Decoder::opcode_save_next(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + if (Prologue) + SW.startLine() << format("0x%02x ; save next\n", OC[Offset]); + else + SW.startLine() << format("0x%02x ; restore next\n", + OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_trap_frame(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; trap frame\n", OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_machine_frame(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; machine frame\n", + OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_context(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; context\n", OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_clear_unwound_to_call(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; clear unwound to call\n", + OC[Offset]); + ++Offset; + return false; +} + void Decoder::decodeOpcodes(ArrayRef Opcodes, unsigned Offset, bool Prologue) { assert((!Prologue || Offset == 0) && "prologue should always use offset 0"); diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h index 5de7062cb1d7b..36fe5d6f4b2b4 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h @@ -120,6 +120,14 @@ class Decoder { bool Prologue); bool opcode_save_next(const uint8_t *Opcodes, unsigned &Offset, unsigned Length, bool Prologue); + bool opcode_trap_frame(const uint8_t *Opcodes, unsigned &Offset, + unsigned Length, bool Prologue); + bool opcode_machine_frame(const uint8_t *Opcodes, unsigned &Offset, + unsigned Length, bool Prologue); + bool opcode_context(const uint8_t *Opcodes, unsigned &Offset, unsigned Length, + bool Prologue); + bool opcode_clear_unwound_to_call(const uint8_t *Opcodes, unsigned &Offset, + unsigned Length, bool Prologue); void decodeOpcodes(ArrayRef Opcodes, unsigned Offset, bool Prologue); diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index fa821ff6a619b..df3799c8fbe67 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -114,17 +114,24 @@ namespace { template class DumpStyle; +template struct RelSymbol { + RelSymbol(const typename ELFT::Sym *S, StringRef N) + : Sym(S), Name(N.str()) {} + const typename ELFT::Sym *Sym; + std::string Name; +}; + /// Represents a contiguous uniform range in the file. We cannot just create a /// range directly because when creating one of these from the .dynamic table /// the size, entity size and virtual address are different entries in arbitrary /// order (DT_REL, DT_RELSZ, DT_RELENT for example). struct DynRegionInfo { DynRegionInfo(StringRef ObjName) : FileName(ObjName) {} - DynRegionInfo(const void *A, uint64_t S, uint64_t ES, StringRef ObjName) + DynRegionInfo(const uint8_t *A, uint64_t S, uint64_t ES, StringRef ObjName) : Addr(A), Size(S), EntSize(ES), FileName(ObjName) {} /// Address in current address space. - const void *Addr = nullptr; + const uint8_t *Addr = nullptr; /// Size in bytes of the region. uint64_t Size = 0; /// Size of each entity in the region. @@ -198,6 +205,24 @@ struct VerNeed { } // namespace +template class Relocation { +public: + Relocation(const typename ELFT::Rel &R, bool IsMips64EL) + : Type(R.getType(IsMips64EL)), Symbol(R.getSymbol(IsMips64EL)), + Offset(R.r_offset), Info(R.r_info) {} + + Relocation(const typename ELFT::Rela &R, bool IsMips64EL) + : Relocation((const typename ELFT::Rel &)R, IsMips64EL) { + Addend = R.r_addend; + } + + uint32_t Type; + uint32_t Symbol; + typename ELFT::uint Offset; + typename ELFT::uint Info; + Optional Addend; +}; + template class ELFDumper : public ObjDumper { public: ELFDumper(const object::ELFObjectFile *ObjF, ScopedPrinter &Writer); @@ -363,19 +388,18 @@ template class ELFDumper : public ObjDumper { Expected> getVersionDependencies(const Elf_Shdr *Sec) const; - template - Expected> - getRelocationTarget(const Elf_Shdr *SymTab, const RelTy &R) const; + Expected> getRelocationTarget(const Relocation &R, + const Elf_Shdr *SymTab) const; std::function WarningHandler; void reportUniqueWarning(Error Err) const; }; template -static std::string describe(const ELFFile *Obj, +static std::string describe(const ELFFile &Obj, const typename ELFT::Shdr &Sec) { - unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); - return (object::getELFSectionTypeName(Obj->getHeader()->e_machine, + unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front(); + return (object::getELFSectionTypeName(Obj.getHeader()->e_machine, Sec.sh_type) + " section with index " + Twine(SecNdx)) .str(); @@ -383,19 +407,19 @@ static std::string describe(const ELFFile *Obj, template std::string ELFDumper::describe(const Elf_Shdr &Sec) const { - return ::describe(ObjF->getELFFile(), Sec); + return ::describe(*ObjF->getELFFile(), Sec); } template -static Expected getLinkAsStrtab(const ELFFile *Obj, +static Expected getLinkAsStrtab(const ELFFile &Obj, const typename ELFT::Shdr *Sec) { Expected StrTabSecOrErr = - Obj->getSection(Sec->sh_link); + Obj.getSection(Sec->sh_link); if (!StrTabSecOrErr) return createError("invalid section linked to " + describe(Obj, *Sec) + ": " + toString(StrTabSecOrErr.takeError())); - Expected StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr); + Expected StrTabOrErr = Obj.getStringTable(*StrTabSecOrErr); if (!StrTabOrErr) return createError("invalid string table linked to " + describe(Obj, *Sec) + ": " + toString(StrTabOrErr.takeError())); @@ -405,22 +429,22 @@ static Expected getLinkAsStrtab(const ELFFile *Obj, // Returns the linked symbol table and associated string table for a given section. template static Expected> -getLinkAsSymtab(const ELFFile *Obj, const typename ELFT::Shdr *Sec, +getLinkAsSymtab(const ELFFile &Obj, const typename ELFT::Shdr *Sec, unsigned ExpectedType) { Expected SymtabOrErr = - Obj->getSection(Sec->sh_link); + Obj.getSection(Sec->sh_link); if (!SymtabOrErr) return createError("invalid section linked to " + describe(Obj, *Sec) + ": " + toString(SymtabOrErr.takeError())); if ((*SymtabOrErr)->sh_type != ExpectedType) - return createError( - "invalid section linked to " + describe(Obj, *Sec) + ": expected " + - object::getELFSectionTypeName(Obj->getHeader()->e_machine, - ExpectedType) + - ", but got " + - object::getELFSectionTypeName(Obj->getHeader()->e_machine, - (*SymtabOrErr)->sh_type)); + return createError("invalid section linked to " + describe(Obj, *Sec) + + ": expected " + + object::getELFSectionTypeName(Obj.getHeader()->e_machine, + ExpectedType) + + ", but got " + + object::getELFSectionTypeName(Obj.getHeader()->e_machine, + (*SymtabOrErr)->sh_type)); Expected StrTabOrErr = getLinkAsStrtab(Obj, *SymtabOrErr); if (!StrTabOrErr) @@ -428,7 +452,7 @@ getLinkAsSymtab(const ELFFile *Obj, const typename ELFT::Shdr *Sec, "can't get a string table for the symbol table linked to " + describe(Obj, *Sec) + ": " + toString(StrTabOrErr.takeError())); - Expected SymsOrErr = Obj->symbols(*SymtabOrErr); + Expected SymsOrErr = Obj.symbols(*SymtabOrErr); if (!SymsOrErr) return createError("unable to read symbols from the " + describe(Obj, *Sec) + ": " + @@ -454,7 +478,7 @@ ELFDumper::getVersionTable(const Elf_Shdr *Sec, ArrayRef *SymTab, toString(VersionsOrErr.takeError())); Expected, StringRef>> SymTabOrErr = - getLinkAsSymtab(Obj, Sec, SHT_DYNSYM); + getLinkAsSymtab(*Obj, Sec, SHT_DYNSYM); if (!SymTabOrErr) { reportUniqueWarning(SymTabOrErr.takeError()); return *VersionsOrErr; @@ -478,7 +502,7 @@ Expected> ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { const ELFFile *Obj = ObjF->getELFFile(); - Expected StrTabOrErr = getLinkAsStrtab(Obj, Sec); + Expected StrTabOrErr = getLinkAsStrtab(*Obj, Sec); if (!StrTabOrErr) return StrTabOrErr.takeError(); @@ -565,7 +589,7 @@ Expected> ELFDumper::getVersionDependencies(const Elf_Shdr *Sec) const { const ELFFile *Obj = ObjF->getELFFile(); StringRef StrTab; - Expected StrTabOrErr = getLinkAsStrtab(Obj, Sec); + Expected StrTabOrErr = getLinkAsStrtab(*Obj, Sec); if (!StrTabOrErr) reportUniqueWarning(StrTabOrErr.takeError()); else @@ -681,10 +705,9 @@ void ELFDumper::printSymbolsHelper(bool IsDynamic) const { return S.st_other & ~0x3; }) != Syms.end(); - ELFDumperStyle->printSymtabMessage(Obj, SymtabSec, Entries, - NonVisibilityBitsUsed); + ELFDumperStyle->printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed); for (const auto &Sym : Syms) - ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic, + ELFDumperStyle->printSymbol(&Sym, Syms.begin(), StrTable, IsDynamic, NonVisibilityBitsUsed); } @@ -694,43 +717,37 @@ template class DumpStyle { public: TYPEDEF_ELF_TYPES(ELFT) - DumpStyle(ELFDumper *Dumper) : Dumper(Dumper) { + DumpStyle(ELFDumper *Dumper) + : Obj(*Dumper->getElfObject()->getELFFile()), Dumper(Dumper) { FileName = this->Dumper->getElfObject()->getFileName(); } virtual ~DumpStyle() = default; - virtual void printFileHeaders(const ELFFile *Obj) = 0; - virtual void printGroupSections(const ELFFile *Obj) = 0; - virtual void printRelocations(const ELFFile *Obj) = 0; - virtual void printSectionHeaders(const ELFFile *Obj) = 0; - virtual void printSymbols(const ELFFile *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) = 0; - virtual void printHashSymbols(const ELFFile *Obj) {} - virtual void printDependentLibs(const ELFFile *Obj) = 0; - virtual void printDynamic(const ELFFile *Obj) {} - virtual void printDynamicRelocations(const ELFFile *Obj) = 0; - virtual void printSymtabMessage(const ELFFile *Obj, - const Elf_Shdr *Symtab, size_t Offset, + virtual void printFileHeaders() = 0; + virtual void printGroupSections() = 0; + virtual void printRelocations() = 0; + virtual void printSectionHeaders() = 0; + virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) = 0; + virtual void printHashSymbols() {} + virtual void printDependentLibs() = 0; + virtual void printDynamic() {} + virtual void printDynamicRelocations() = 0; + virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, bool NonVisibilityBitsUsed) {} - virtual void printSymbol(const ELFFile *Obj, const Elf_Sym *Symbol, - const Elf_Sym *FirstSym, + virtual void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) = 0; - virtual void printProgramHeaders(const ELFFile *Obj, - bool PrintProgramHeaders, + virtual void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) = 0; - virtual void printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) = 0; - virtual void printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) = 0; - virtual void printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) = 0; - virtual void printHashHistograms(const ELFFile *Obj) = 0; - virtual void printCGProfile(const ELFFile *Obj) = 0; - virtual void printAddrsig(const ELFFile *Obj) = 0; - virtual void printNotes(const ELFFile *Obj) = 0; - virtual void printELFLinkerOptions(const ELFFile *Obj) = 0; + virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0; + virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0; + virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0; + virtual void printHashHistograms() = 0; + virtual void printCGProfile() = 0; + virtual void printAddrsig() = 0; + virtual void printNotes() = 0; + virtual void printELFLinkerOptions() = 0; virtual void printStackSizes(const ELFObjectFile *Obj) = 0; void printNonRelocatableStackSizes(const ELFObjectFile *Obj, std::function PrintHeader); @@ -738,11 +755,10 @@ template class DumpStyle { std::function PrintHeader); void printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, Optional FunctionSec, - const StringRef SectionName, DataExtractor Data, + const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset); void printStackSize(const ELFObjectFile *Obj, RelocationRef Rel, - SectionRef FunctionSec, - const StringRef &StackSizeSectionName, + SectionRef FunctionSec, const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data); virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; @@ -752,24 +768,24 @@ template class DumpStyle { protected: void printDependentLibsHelper( - const ELFFile *Obj, function_ref OnSectionStart, function_ref OnSectionEntry); - virtual void printRelReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rel &R, - unsigned RelIndex) = 0; - virtual void printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) = 0; + virtual void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) = 0; virtual void printRelrReloc(const Elf_Relr &R) = 0; - void printRelocationsHelper(const ELFFile *Obj, const Elf_Shdr &Sec); + virtual void printDynamicReloc(const Relocation &R) = 0; + void printRelocationsHelper(const Elf_Shdr &Sec); + void printDynamicRelocationsHelper(); + virtual void printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg){}; - StringRef getPrintableSectionName(const ELFFile *Obj, - const Elf_Shdr &Sec) const; + StringRef getPrintableSectionName(const Elf_Shdr &Sec) const; void reportUniqueWarning(Error Err) const; + StringRef FileName; + const ELFFile &Obj; private: const ELFDumper *Dumper; @@ -787,31 +803,27 @@ template class GNUStyle : public DumpStyle { assert (&W.getOStream() == &llvm::fouts()); } - void printFileHeaders(const ELFO *Obj) override; - void printGroupSections(const ELFFile *Obj) override; - void printRelocations(const ELFO *Obj) override; - void printSectionHeaders(const ELFO *Obj) override; - void printSymbols(const ELFO *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) override; - void printHashSymbols(const ELFO *Obj) override; - void printDependentLibs(const ELFFile *Obj) override; - void printDynamic(const ELFFile *Obj) override; - void printDynamicRelocations(const ELFO *Obj) override; - void printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, - size_t Offset, bool NonVisibilityBitsUsed) override; - void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, + void printFileHeaders() override; + void printGroupSections() override; + void printRelocations() override; + void printSectionHeaders() override; + void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override; + void printHashSymbols() override; + void printDependentLibs() override; + void printDynamic() override; + void printDynamicRelocations() override; + void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, + bool NonVisibilityBitsUsed) override; + void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; - void printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printHashHistograms(const ELFFile *Obj) override; - void printCGProfile(const ELFFile *Obj) override; - void printAddrsig(const ELFFile *Obj) override; - void printNotes(const ELFFile *Obj) override; - void printELFLinkerOptions(const ELFFile *Obj) override; + void printVersionSymbolSection(const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const Elf_Shdr *Sec) override; + void printVersionDependencySection(const Elf_Shdr *Sec) override; + void printHashHistograms() override; + void printCGProfile() override; + void printAddrsig() override; + void printNotes() override; + void printELFLinkerOptions() override; void printStackSizes(const ELFObjectFile *Obj) override; void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; @@ -822,9 +834,8 @@ template class GNUStyle : public DumpStyle { void printHashHistogram(const Elf_Hash &HashTable); void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable); - void printHashTableSymbols(const ELFO *Obj, const Elf_Hash &HashTable); - void printGnuHashTableSymbols(const ELFO *Obj, - const Elf_GnuHash &GnuHashTable); + void printHashTableSymbols(const Elf_Hash &HashTable); + void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable); struct Field { std::string Str; @@ -876,35 +887,26 @@ template class GNUStyle : public DumpStyle { OS.flush(); return OS; } - void printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, uint32_t Sym, + void printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, StringRef StrTable, uint32_t Bucket); - void printRelocHeader(unsigned SType); - - void printRelReloc(const ELFO *Obj, unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) override; - void printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) override; + void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - template - void printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const RelTy &R, - unsigned RelIndex); - template - void printRelRelaReloc(const ELFO *Obj, const Elf_Sym *Sym, - StringRef SymbolName, const RelTy &R); - void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, + void printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym); + void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) override; - std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol, + void printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg) override; + void printDynamicReloc(const Relocation &R) override; + + std::string getSymbolSectionNdx(const Elf_Sym *Symbol, const Elf_Sym *FirstSym); - template - void printDynamicRelocation(const ELFO *Obj, const RelTy &R); - void printProgramHeaders(const ELFO *Obj); - void printSectionMapping(const ELFO *Obj); - void printGNUVersionSectionProlog(const ELFFile *Obj, - const typename ELFT::Shdr *Sec, + void printProgramHeaders(); + void printSectionMapping(); + void printGNUVersionSectionProlog(const typename ELFT::Shdr *Sec, const Twine &Label, unsigned EntriesNum); }; @@ -928,28 +930,24 @@ template class LLVMStyle : public DumpStyle { LLVMStyle(ScopedPrinter &W, ELFDumper *Dumper) : DumpStyle(Dumper), W(W) {} - void printFileHeaders(const ELFO *Obj) override; - void printGroupSections(const ELFFile *Obj) override; - void printRelocations(const ELFO *Obj) override; - void printSectionHeaders(const ELFO *Obj) override; - void printSymbols(const ELFO *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) override; - void printDependentLibs(const ELFFile *Obj) override; - void printDynamic(const ELFFile *Obj) override; - void printDynamicRelocations(const ELFO *Obj) override; - void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, + void printFileHeaders() override; + void printGroupSections() override; + void printRelocations() override; + void printSectionHeaders() override; + void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override; + void printDependentLibs() override; + void printDynamic() override; + void printDynamicRelocations() override; + void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; - void printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printHashHistograms(const ELFFile *Obj) override; - void printCGProfile(const ELFFile *Obj) override; - void printAddrsig(const ELFFile *Obj) override; - void printNotes(const ELFFile *Obj) override; - void printELFLinkerOptions(const ELFFile *Obj) override; + void printVersionSymbolSection(const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const Elf_Shdr *Sec) override; + void printVersionDependencySection(const Elf_Shdr *Sec) override; + void printHashHistograms() override; + void printCGProfile() override; + void printAddrsig() override; + void printNotes() override; + void printELFLinkerOptions() override; void printStackSizes(const ELFObjectFile *Obj) override; void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; @@ -957,26 +955,20 @@ template class LLVMStyle : public DumpStyle { void printMipsABIFlags(const ELFObjectFile *Obj) override; private: - void printRelReloc(const ELFO *Obj, unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) override; - void printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) override; + void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - template - void printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, const RelTy &Rel, - unsigned RelIndex, const Elf_Shdr *SymTab); - template - void printDynamicRelocation(const ELFO *Obj, const RelTy& Rel); - - void printSymbols(const ELFO *Obj); - void printDynamicSymbols(const ELFO *Obj); + void printDynamicReloc(const Relocation &R) override; + + void printRelRelaReloc(const Relocation &R, StringRef SymbolName); + void printSymbols(); + void printDynamicSymbols(); void printSymbolSection(const Elf_Sym *Symbol, const Elf_Sym *First); - void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, + void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/) override; - void printProgramHeaders(const ELFO *Obj); - void printSectionMapping(const ELFO *Obj) {} + void printProgramHeaders(); + void printSectionMapping() {} ScopedPrinter &W; }; @@ -1079,42 +1071,45 @@ Expected ELFDumper::getSymbolVersion(const Elf_Sym *Sym, } template -template -Expected> -ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, - const RelTy &R) const { - const ELFFile *Obj = ObjF->getELFFile(); - Expected SymOrErr = Obj->getRelocationSymbol(&R, SymTab); +Expected> +ELFDumper::getRelocationTarget(const Relocation &R, + const Elf_Shdr *SymTab) const { + if (R.Symbol == 0) + return RelSymbol(nullptr, ""); + + const ELFFile &Obj = *ObjF->getELFFile(); + Expected SymOrErr = + Obj.template getEntry(SymTab, R.Symbol); if (!SymOrErr) return SymOrErr.takeError(); const Elf_Sym *Sym = *SymOrErr; if (!Sym) - return std::make_pair(nullptr, ""); + return RelSymbol(nullptr, ""); // The st_name field of a STT_SECTION is usually 0 (empty string). // This code block returns the section name. if (Sym->getType() == ELF::STT_SECTION) { Expected SecOrErr = - Obj->getSection(Sym, SymTab, ShndxTable); + Obj.getSection(Sym, SymTab, ShndxTable); if (!SecOrErr) return SecOrErr.takeError(); // A section symbol describes the section at index 0. if (*SecOrErr == nullptr) - return std::make_pair(Sym, ""); + return RelSymbol(Sym, ""); - Expected NameOrErr = Obj->getSectionName(*SecOrErr); + Expected NameOrErr = Obj.getSectionName(*SecOrErr); if (!NameOrErr) return NameOrErr.takeError(); - return std::make_pair(Sym, NameOrErr->str()); + return RelSymbol(Sym, NameOrErr->str()); } - Expected StrTableOrErr = Obj->getStringTableForSymtab(*SymTab); + Expected StrTableOrErr = Obj.getStringTableForSymtab(*SymTab); if (!StrTableOrErr) return StrTableOrErr.takeError(); std::string SymbolName = getFullSymbolName(Sym, *StrTableOrErr, SymTab->sh_type == SHT_DYNSYM); - return std::make_pair(Sym, SymbolName); + return RelSymbol(Sym, SymbolName); } static std::string maybeDemangle(StringRef Name) { @@ -2273,71 +2268,66 @@ typename ELFDumper::Elf_Relr_Range ELFDumper::dyn_relrs() const { } template void ELFDumper::printFileHeaders() { - ELFDumperStyle->printFileHeaders(ObjF->getELFFile()); + ELFDumperStyle->printFileHeaders(); } template void ELFDumper::printSectionHeaders() { - ELFDumperStyle->printSectionHeaders(ObjF->getELFFile()); + ELFDumperStyle->printSectionHeaders(); } template void ELFDumper::printRelocations() { - ELFDumperStyle->printRelocations(ObjF->getELFFile()); + ELFDumperStyle->printRelocations(); } template void ELFDumper::printProgramHeaders( bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { - ELFDumperStyle->printProgramHeaders(ObjF->getELFFile(), PrintProgramHeaders, - PrintSectionMapping); + ELFDumperStyle->printProgramHeaders(PrintProgramHeaders, PrintSectionMapping); } template void ELFDumper::printVersionInfo() { // Dump version symbol section. - ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(), - SymbolVersionSection); + ELFDumperStyle->printVersionSymbolSection(SymbolVersionSection); // Dump version definition section. - ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(), - SymbolVersionDefSection); + ELFDumperStyle->printVersionDefinitionSection(SymbolVersionDefSection); // Dump version dependency section. - ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(), - SymbolVersionNeedSection); + ELFDumperStyle->printVersionDependencySection(SymbolVersionNeedSection); } template void ELFDumper::printDependentLibs() { - ELFDumperStyle->printDependentLibs(ObjF->getELFFile()); + ELFDumperStyle->printDependentLibs(); } template void ELFDumper::printDynamicRelocations() { - ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile()); + ELFDumperStyle->printDynamicRelocations(); } template void ELFDumper::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { - ELFDumperStyle->printSymbols(ObjF->getELFFile(), PrintSymbols, - PrintDynamicSymbols); + ELFDumperStyle->printSymbols(PrintSymbols, PrintDynamicSymbols); } template void ELFDumper::printHashSymbols() { - ELFDumperStyle->printHashSymbols(ObjF->getELFFile()); + ELFDumperStyle->printHashSymbols(); } template void ELFDumper::printHashHistograms() { - ELFDumperStyle->printHashHistograms(ObjF->getELFFile()); + ELFDumperStyle->printHashHistograms(); } template void ELFDumper::printCGProfile() { - ELFDumperStyle->printCGProfile(ObjF->getELFFile()); + ELFDumperStyle->printCGProfile(); } template void ELFDumper::printNotes() { - ELFDumperStyle->printNotes(ObjF->getELFFile()); + ELFDumperStyle->printNotes(); } template void ELFDumper::printELFLinkerOptions() { - ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile()); + ELFDumperStyle->printELFLinkerOptions(); } template void ELFDumper::printStackSizes() { @@ -2673,7 +2663,7 @@ template <> void ELFDumper::printUnwindInfo() { } // end anonymous namespace template void ELFDumper::printDynamicTable() { - ELFDumperStyle->printDynamic(ObjF->getELFFile()); + ELFDumperStyle->printDynamic(); } template void ELFDumper::printNeededLibraries() { @@ -2691,26 +2681,26 @@ template void ELFDumper::printNeededLibraries() { } template -static Error checkHashTable(const ELFFile *Obj, +static Error checkHashTable(const ELFFile &Obj, const typename ELFT::Hash *H, bool *IsHeaderValid = nullptr) { auto MakeError = [&](uint64_t Off, const Twine &Msg = "") { return createError("the hash table at offset 0x" + Twine::utohexstr(Off) + " goes past the end of the file (0x" + - Twine::utohexstr(Obj->getBufSize()) + ")" + Msg); + Twine::utohexstr(Obj.getBufSize()) + ")" + Msg); }; // Each SHT_HASH section starts from two 32-bit fields: nbucket and nchain. const unsigned HeaderSize = 2 * sizeof(typename ELFT::Word); - const uint64_t SecOffset = (const uint8_t *)H - Obj->base(); + const uint64_t SecOffset = (const uint8_t *)H - Obj.base(); if (IsHeaderValid) - *IsHeaderValid = Obj->getBufSize() - SecOffset >= HeaderSize; + *IsHeaderValid = Obj.getBufSize() - SecOffset >= HeaderSize; - if (Obj->getBufSize() - SecOffset < HeaderSize) + if (Obj.getBufSize() - SecOffset < HeaderSize) return MakeError(SecOffset); - if (Obj->getBufSize() - SecOffset - HeaderSize < + if (Obj.getBufSize() - SecOffset - HeaderSize < ((uint64_t)H->nbucket + H->nchain) * sizeof(typename ELFT::Word)) return MakeError(SecOffset, ", nbucket = " + Twine(H->nbucket) + ", nchain = " + Twine(H->nchain)); @@ -2718,20 +2708,19 @@ static Error checkHashTable(const ELFFile *Obj, } template -static Error checkGNUHashTable(const ELFFile *Obj, +static Error checkGNUHashTable(const ELFFile &Obj, const typename ELFT::GnuHash *GnuHashTable, bool *IsHeaderValid = nullptr) { const uint8_t *TableData = reinterpret_cast(GnuHashTable); - assert(TableData >= Obj->base() && - TableData < Obj->base() + Obj->getBufSize() && + assert(TableData >= Obj.base() && TableData < Obj.base() + Obj.getBufSize() && "GnuHashTable must always point to a location inside the file"); - uint64_t TableOffset = TableData - Obj->base(); + uint64_t TableOffset = TableData - Obj.base(); if (IsHeaderValid) - *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj->getBufSize(); + *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj.getBufSize(); if (TableOffset + 16 + (uint64_t)GnuHashTable->nbuckets * 4 + (uint64_t)GnuHashTable->maskwords * sizeof(typename ELFT::Off) >= - Obj->getBufSize()) + Obj.getBufSize()) return createError("unable to dump the SHT_GNU_HASH " "section at 0x" + Twine::utohexstr(TableOffset) + @@ -2745,7 +2734,7 @@ template void ELFDumper::printHashTable() { return; bool IsHeaderValid; - Error Err = checkHashTable(ObjF->getELFFile(), HashTable, &IsHeaderValid); + Error Err = checkHashTable(*ObjF->getELFFile(), HashTable, &IsHeaderValid); if (IsHeaderValid) { W.printNumber("Num Buckets", HashTable->nbucket); W.printNumber("Num Chains", HashTable->nchain); @@ -2801,8 +2790,8 @@ void ELFDumper::printGnuHashTable(const object::ObjectFile *Obj) { return; bool IsHeaderValid; - Error Err = - checkGNUHashTable(ObjF->getELFFile(), GnuHashTable, &IsHeaderValid); + Error Err = checkGNUHashTable(*ObjF->getELFFile(), GnuHashTable, + &IsHeaderValid); if (IsHeaderValid) { W.printNumber("Num Buckets", GnuHashTable->nbuckets); W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx); @@ -3093,7 +3082,7 @@ Error MipsGOTParser::findPLT(Elf_Dyn_Range DynTable) { PltSymTable = *PltSymTableOrErr; else return createError("unable to get a symbol table linked to the " + - describe(Obj, *PltRelSec) + ": " + + describe(*Obj, *PltRelSec) + ": " + toString(PltSymTableOrErr.takeError())); if (Expected StrTabOrErr = @@ -3101,7 +3090,7 @@ Error MipsGOTParser::findPLT(Elf_Dyn_Range DynTable) { PltStrTable = *StrTabOrErr; else return createError("unable to get a string table for the " + - describe(Obj, *PltSymTable) + ": " + + describe(*Obj, *PltSymTable) + ": " + toString(StrTabOrErr.takeError())); return Error::success(); @@ -3431,11 +3420,11 @@ template void ELFDumper::printStackMap() const { } template void ELFDumper::printGroupSections() { - ELFDumperStyle->printGroupSections(ObjF->getELFFile()); + ELFDumperStyle->printGroupSections(); } template void ELFDumper::printAddrsig() { - ELFDumperStyle->printAddrsig(ObjF->getELFFile()); + ELFDumperStyle->printAddrsig(); } static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, @@ -3448,34 +3437,34 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, } template -static std::string getSectionHeadersNumString(const ELFFile *Obj, +static std::string getSectionHeadersNumString(const ELFFile &Obj, StringRef FileName) { - const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); + const typename ELFT::Ehdr *ElfHeader = Obj.getHeader(); if (ElfHeader->e_shnum != 0) return to_string(ElfHeader->e_shnum); - ArrayRef Arr = cantFail(Obj->sections()); + ArrayRef Arr = cantFail(Obj.sections()); if (Arr.empty()) return "0"; return "0 (" + to_string(Arr[0].sh_size) + ")"; } template -static std::string getSectionHeaderTableIndexString(const ELFFile *Obj, +static std::string getSectionHeaderTableIndexString(const ELFFile &Obj, StringRef FileName) { - const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); + const typename ELFT::Ehdr *ElfHeader = Obj.getHeader(); if (ElfHeader->e_shstrndx != SHN_XINDEX) return to_string(ElfHeader->e_shstrndx); - ArrayRef Arr = cantFail(Obj->sections()); + ArrayRef Arr = cantFail(Obj.sections()); if (Arr.empty()) return "65535 (corrupt: out of range)"; return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + ")"; } -template void GNUStyle::printFileHeaders(const ELFO *Obj) { - const Elf_Ehdr *e = Obj->getHeader(); +template void GNUStyle::printFileHeaders() { + const Elf_Ehdr *e = this->Obj.getHeader(); OS << "ELF Header:\n"; OS << " Magic: "; std::string Str; @@ -3529,9 +3518,9 @@ template void GNUStyle::printFileHeaders(const ELFO *Obj) { printFields(OS, "Number of program headers:", Str); Str = to_string(e->e_shentsize) + " (bytes)"; printFields(OS, "Size of section headers:", Str); - Str = getSectionHeadersNumString(Obj, this->FileName); + Str = getSectionHeadersNumString(this->Obj, this->FileName); printFields(OS, "Number of section headers:", Str); - Str = getSectionHeaderTableIndexString(Obj, this->FileName); + Str = getSectionHeaderTableIndexString(this->Obj, this->FileName); printFields(OS, "Section header string table index:", Str); } @@ -3553,7 +3542,7 @@ struct GroupSection { }; template -std::vector getGroups(const ELFFile *Obj, +std::vector getGroups(const ELFFile &Obj, StringRef FileName) { using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; @@ -3561,21 +3550,21 @@ std::vector getGroups(const ELFFile *Obj, std::vector Ret; uint64_t I = 0; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { ++I; if (Sec.sh_type != ELF::SHT_GROUP) continue; const Elf_Shdr *Symtab = - unwrapOrError(FileName, Obj->getSection(Sec.sh_link)); + unwrapOrError(FileName, Obj.getSection(Sec.sh_link)); StringRef StrTable = - unwrapOrError(FileName, Obj->getStringTableForSymtab(*Symtab)); + unwrapOrError(FileName, Obj.getStringTableForSymtab(*Symtab)); const Elf_Sym *Sym = unwrapOrError( - FileName, Obj->template getEntry(Symtab, Sec.sh_info)); + FileName, Obj.template getEntry(Symtab, Sec.sh_info)); auto Data = unwrapOrError( - FileName, Obj->template getSectionContentsAsArray(&Sec)); + FileName, Obj.template getSectionContentsAsArray(&Sec)); - StringRef Name = unwrapOrError(FileName, Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(FileName, Obj.getSectionName(&Sec)); StringRef Signature = StrTable.data() + Sym->st_name; Ret.push_back({Name, maybeDemangle(Signature), @@ -3588,8 +3577,8 @@ std::vector getGroups(const ELFFile *Obj, std::vector &GM = Ret.back().Members; for (uint32_t Ndx : Data.slice(1)) { - auto Sec = unwrapOrError(FileName, Obj->getSection(Ndx)); - const StringRef Name = unwrapOrError(FileName, Obj->getSectionName(Sec)); + auto Sec = unwrapOrError(FileName, Obj.getSection(Ndx)); + const StringRef Name = unwrapOrError(FileName, Obj.getSectionName(Sec)); GM.push_back({Name, Ndx}); } } @@ -3607,8 +3596,8 @@ mapSectionsToGroups(ArrayRef Groups) { } // namespace -template void GNUStyle::printGroupSections(const ELFO *Obj) { - std::vector V = getGroups(Obj, this->FileName); +template void GNUStyle::printGroupSections() { + std::vector V = getGroups(this->Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { OS << "\n" @@ -3634,75 +3623,49 @@ template void GNUStyle::printGroupSections(const ELFO *Obj) { } template -void GNUStyle::printRelReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rel &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, SymTab, R, RelIndex); -} - -template -void GNUStyle::printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, SymTab, R, RelIndex); -} - -template void GNUStyle::printRelrReloc(const Elf_Relr &R) { - OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; -} - -template -template -void GNUStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const RelTy &R, - unsigned RelIndex) { - Expected> Target = - this->dumper()->getRelocationTarget(SymTab, R); +void GNUStyle::printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { + Expected> Target = + this->dumper()->getRelocationTarget(R, SymTab); if (!Target) this->reportUniqueWarning(createError( - "unable to print relocation " + Twine(RelIndex) + " in section " + - Twine(SecIndex) + ": " + toString(Target.takeError()))); + "unable to print relocation " + Twine(RelIndex) + " in " + + describe(this->Obj, Sec) + ": " + toString(Target.takeError()))); else - printRelRelaReloc(Obj, /*Sym=*/Target->first, /*Name=*/Target->second, R); -} - -template -static Optional getAddend(const typename ELFT::Rela &R) { - return (int64_t)R.r_addend; + printRelRelaReloc(R, *Target); } -template -static Optional getAddend(const typename ELFT::Rel &) { - return None; +template void GNUStyle::printRelrReloc(const Elf_Relr &R) { + OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; } template -template -void GNUStyle::printRelRelaReloc(const ELFO *Obj, const Elf_Sym *Sym, - StringRef SymbolName, const RelTy &R) { +void GNUStyle::printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym) { // First two fields are bit width dependent. The rest of them are fixed width. unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias}; unsigned Width = ELFT::Is64Bits ? 16 : 8; - Fields[0].Str = to_string(format_hex_no_prefix(R.r_offset, Width)); - Fields[1].Str = to_string(format_hex_no_prefix(R.r_info, Width)); + Fields[0].Str = to_string(format_hex_no_prefix(R.Offset, Width)); + Fields[1].Str = to_string(format_hex_no_prefix(R.Info, Width)); SmallString<32> RelocName; - Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName); + this->Obj.getRelocationTypeName(R.Type, RelocName); Fields[2].Str = RelocName.c_str(); - if (Sym) - Fields[3].Str = to_string(format_hex_no_prefix(Sym->getValue(), Width)); + if (RelSym.Sym) + Fields[3].Str = + to_string(format_hex_no_prefix(RelSym.Sym->getValue(), Width)); - Fields[4].Str = std::string(SymbolName); + Fields[4].Str = std::string(RelSym.Name); for (const Field &F : Fields) printField(F); std::string Addend; - if (Optional A = getAddend(R)) { + if (Optional A = R.Addend) { int64_t RelAddend = *A; - if (!SymbolName.empty()) { + if (!RelSym.Name.empty()) { if (RelAddend < 0) { Addend = " - "; RelAddend = std::abs(RelAddend); @@ -3715,7 +3678,8 @@ void GNUStyle::printRelRelaReloc(const ELFO *Obj, const Elf_Sym *Sym, OS << Addend << "\n"; } -template void GNUStyle::printRelocHeader(unsigned SType) { +template +static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType) { bool IsRela = SType == ELF::SHT_RELA || SType == ELF::SHT_ANDROID_RELA; bool IsRelr = SType == ELF::SHT_RELR || SType == ELF::SHT_ANDROID_RELR; if (ELFT::Is64Bits) @@ -3736,6 +3700,15 @@ template void GNUStyle::printRelocHeader(unsigned SType) { OS << "\n"; } +template +void GNUStyle::printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg) { + uint64_t Offset = Reg.Addr - this->Obj.base(); + OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x" + << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n"; + printRelocHeaderFields(OS, Type); +} + template static bool isRelocationSec(const typename ELFT::Shdr &Sec) { return Sec.sh_type == ELF::SHT_REL || Sec.sh_type == ELF::SHT_RELA || @@ -3744,14 +3717,14 @@ static bool isRelocationSec(const typename ELFT::Shdr &Sec) { Sec.sh_type == ELF::SHT_ANDROID_RELR; } -template void GNUStyle::printRelocations(const ELFO *Obj) { +template void GNUStyle::printRelocations() { auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected { // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. if (Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA) { Expected> RelasOrErr = - Obj->android_relas(&Sec); + this->Obj.android_relas(&Sec); if (!RelasOrErr) return RelasOrErr.takeError(); return RelasOrErr->size(); @@ -3759,17 +3732,17 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_RELR)) { - Expected RelrsOrErr = Obj->relrs(&Sec); + Expected RelrsOrErr = this->Obj.relrs(&Sec); if (!RelrsOrErr) return RelrsOrErr.takeError(); - return Obj->decode_relrs(*RelrsOrErr).size(); + return this->Obj.decode_relrs(*RelrsOrErr).size(); } return Sec.getEntityCount(); }; bool HasRelocSections = false; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (!isRelocationSec(Sec)) continue; HasRelocSections = true; @@ -3779,16 +3752,16 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { EntriesNum = std::to_string(*NumOrErr); else this->reportUniqueWarning(createError( - "unable to get the number of relocations in " + describe(Obj, Sec) + - ": " + toString(NumOrErr.takeError()))); + "unable to get the number of relocations in " + + describe(this->Obj, Sec) + ": " + toString(NumOrErr.takeError()))); uintX_t Offset = Sec.sh_offset; - StringRef Name = this->getPrintableSectionName(Obj, Sec); + StringRef Name = this->getPrintableSectionName(Sec); OS << "\nRelocation section '" << Name << "' at offset 0x" << to_hexString(Offset, false) << " contains " << EntriesNum << " entries:\n"; - printRelocHeader(Sec.sh_type); - this->printRelocationsHelper(Obj, Sec); + printRelocHeaderFields(OS, Sec.sh_type); + this->printRelocationsHelper(Sec); } if (!HasRelocSections) OS << "\nThere are no relocations in this file.\n"; @@ -3846,13 +3819,12 @@ static void printSectionDescription(formatted_raw_ostream &OS, OS << "p (processor specific)\n"; } -template -void GNUStyle::printSectionHeaders(const ELFO *Obj) { +template void GNUStyle::printSectionHeaders() { unsigned Bias = ELFT::Is64Bits ? 0 : 8; - ArrayRef Sections = cantFail(Obj->sections()); + ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " - << "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n"; + << "0x" << to_hexString(this->Obj.getHeader()->e_shoff, false) << ":\n\n"; OS << "Section Headers:\n"; Field Fields[11] = { {"[Nr]", 2}, {"Name", 7}, {"Type", 25}, @@ -3864,8 +3836,8 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { OS << "\n"; StringRef SecStrTable; - if (Expected SecStrTableOrErr = - Obj->getSectionStringTable(Sections, this->dumper()->WarningHandler)) + if (Expected SecStrTableOrErr = this->Obj.getSectionStringTable( + Sections, this->dumper()->WarningHandler)) SecStrTable = *SecStrTableOrErr; else this->reportUniqueWarning(SecStrTableOrErr.takeError()); @@ -3877,15 +3849,15 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { Fields[1].Str = ""; else Fields[1].Str = std::string(unwrapOrError( - this->FileName, Obj->getSectionName(&Sec, SecStrTable))); + this->FileName, this->Obj.getSectionName(&Sec, SecStrTable))); Fields[2].Str = - getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type); + getSectionTypeString(this->Obj.getHeader()->e_machine, Sec.sh_type); Fields[3].Str = to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8)); Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6)); Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6)); Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2)); - Fields[7].Str = getGNUFlags(Obj->getHeader()->e_machine, Sec.sh_flags); + Fields[7].Str = getGNUFlags(this->Obj.getHeader()->e_machine, Sec.sh_flags); Fields[8].Str = to_string(Sec.sh_link); Fields[9].Str = to_string(Sec.sh_info); Fields[10].Str = to_string(Sec.sh_addralign); @@ -3905,16 +3877,15 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { OS << "\n"; ++SectionIndex; } - printSectionDescription(OS, Obj->getHeader()->e_machine); + printSectionDescription(OS, this->Obj.getHeader()->e_machine); } template -void GNUStyle::printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, - size_t Entries, +void GNUStyle::printSymtabMessage(const Elf_Shdr *Symtab, size_t Entries, bool NonVisibilityBitsUsed) { StringRef Name; if (Symtab) - Name = this->getPrintableSectionName(Obj, *Symtab); + Name = this->getPrintableSectionName(*Symtab); if (!Name.empty()) OS << "\nSymbol table '" << Name << "'"; else @@ -3932,8 +3903,7 @@ void GNUStyle::printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, } template -std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, - const Elf_Sym *Symbol, +std::string GNUStyle::getSymbolSectionNdx(const Elf_Sym *Symbol, const Elf_Sym *FirstSym) { unsigned SectionIndex = Symbol->st_shndx; switch (SectionIndex) { @@ -3976,8 +3946,7 @@ std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, } template -void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, - const Elf_Sym *FirstSym, +void GNUStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) { unsigned Bias = ELFT::Is64Bits ? 8 : 0; @@ -3989,7 +3958,7 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, Fields[2].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); - if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[3].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -4004,7 +3973,7 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, " [st_other, 2)) + ">]"; Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0; - Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym); + Fields[6].Str = getSymbolSectionNdx(Symbol, FirstSym); Fields[7].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic); @@ -4014,9 +3983,8 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, } template -void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, - uint32_t Sym, StringRef StrTable, - uint32_t Bucket) { +void GNUStyle::printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, + StringRef StrTable, uint32_t Bucket) { unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias, 34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias}; @@ -4029,7 +3997,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); - if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[4].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -4039,7 +4007,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); Fields[6].Str = printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities)); - Fields[7].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym); + Fields[7].Str = getSymbolSectionNdx(Symbol, FirstSym); Fields[8].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, true); for (auto &Entry : Fields) @@ -4048,8 +4016,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, } template -void GNUStyle::printSymbols(const ELFO *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) { +void GNUStyle::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { if (!PrintSymbols && !PrintDynamicSymbols) return; // GNU readelf prints both the .dynsym and .symtab with --symbols. @@ -4059,8 +4026,7 @@ void GNUStyle::printSymbols(const ELFO *Obj, bool PrintSymbols, } template -void GNUStyle::printHashTableSymbols(const ELFO *Obj, - const Elf_Hash &SysVHash) { +void GNUStyle::printHashTableSymbols(const Elf_Hash &SysVHash) { StringRef StringTable = this->dumper()->getDynamicStringTable(); if (StringTable.empty()) return; @@ -4100,15 +4066,14 @@ void GNUStyle::printHashTableSymbols(const ELFO *Obj, break; } - printHashedSymbol(Obj, FirstSym, Ch, StringTable, Buc); + printHashedSymbol(FirstSym, Ch, StringTable, Buc); Visited[Ch] = true; } } } template -void GNUStyle::printGnuHashTableSymbols(const ELFO *Obj, - const Elf_GnuHash &GnuHash) { +void GNUStyle::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { StringRef StringTable = this->dumper()->getDynamicStringTable(); if (StringTable.empty()) return; @@ -4132,7 +4097,7 @@ void GNUStyle::printGnuHashTableSymbols(const ELFO *Obj, uint32_t GnuHashable = Index - GnuHash.symndx; // Print whole chain while (true) { - printHashedSymbol(Obj, FirstSym, Index++, StringTable, Buc); + printHashedSymbol(FirstSym, Index++, StringTable, Buc); // Chain ends at symbol with stopper bit if ((GnuHash.values(DynSyms.size())[GnuHashable++] & 1) == 1) break; @@ -4140,13 +4105,13 @@ void GNUStyle::printGnuHashTableSymbols(const ELFO *Obj, } } -template void GNUStyle::printHashSymbols(const ELFO *Obj) { +template void GNUStyle::printHashSymbols() { if (const Elf_Hash *SysVHash = this->dumper()->getHashTable()) { OS << "\n Symbol table of .hash for image:\n"; - if (Error E = checkHashTable(Obj, SysVHash)) + if (Error E = checkHashTable(this->Obj, SysVHash)) this->reportUniqueWarning(std::move(E)); else - printHashTableSymbols(Obj, *SysVHash); + printHashTableSymbols(*SysVHash); } // Try printing the .gnu.hash table. @@ -4158,10 +4123,10 @@ template void GNUStyle::printHashSymbols(const ELFO *Obj) { OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; OS << "\n"; - if (Error E = checkGNUHashTable(Obj, GnuHash)) + if (Error E = checkGNUHashTable(this->Obj, GnuHash)) this->reportUniqueWarning(std::move(E)); else - printGnuHashTableSymbols(Obj, *GnuHash); + printGnuHashTableSymbols(*GnuHash); } } @@ -4247,21 +4212,19 @@ static bool checkPTDynamic(const typename ELFT::Phdr &Phdr, template void GNUStyle::printProgramHeaders( - const ELFO *Obj, bool PrintProgramHeaders, - cl::boolOrDefault PrintSectionMapping) { + bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) - printProgramHeaders(Obj); + printProgramHeaders(); // Display the section mapping along with the program headers, unless // -section-mapping is explicitly set to false. if (PrintSectionMapping != cl::BOU_FALSE) - printSectionMapping(Obj); + printSectionMapping(); } -template -void GNUStyle::printProgramHeaders(const ELFO *Obj) { +template void GNUStyle::printProgramHeaders() { unsigned Bias = ELFT::Is64Bits ? 8 : 0; - const Elf_Ehdr *Header = Obj->getHeader(); + const Elf_Ehdr *Header = this->Obj.getHeader(); Field Fields[8] = {2, 17, 26, 37 + Bias, 48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias}; OS << "\nElf file type is " @@ -4280,7 +4243,7 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { unsigned Width = ELFT::Is64Bits ? 18 : 10; unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7; - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError("unable to dump program headers: " + toString(PhdrsOrErr.takeError()))); @@ -4307,15 +4270,15 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { this->FileName); }; - if (Phdr.p_offset >= Obj->getBufSize()) { + if (Phdr.p_offset >= this->Obj.getBufSize()) { ReportBadInterp("it goes past the end of the file (0x" + - Twine::utohexstr(Obj->getBufSize()) + ")"); + Twine::utohexstr(this->Obj.getBufSize()) + ")"); continue; } const char *Data = - reinterpret_cast(Obj->base()) + Phdr.p_offset; - size_t MaxSize = Obj->getBufSize() - Phdr.p_offset; + reinterpret_cast(this->Obj.base()) + Phdr.p_offset; + size_t MaxSize = this->Obj.getBufSize() - Phdr.p_offset; size_t Len = strnlen(Data, MaxSize); if (Len == MaxSize) { ReportBadInterp("it is not null-terminated"); @@ -4329,13 +4292,12 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { } } -template -void GNUStyle::printSectionMapping(const ELFO *Obj) { +template void GNUStyle::printSectionMapping() { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; DenseSet BelongsToSegment; int Phnum = 0; - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError( "can't read program headers to build section to segment mapping: " + @@ -4347,7 +4309,7 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { std::string Sections; OS << format(" %2.2d ", Phnum++); // Check if each section is in a segment and then print mapping. - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (Sec.sh_type == ELF::SHT_NULL) continue; @@ -4357,7 +4319,8 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { if (checkTLSSections(Phdr, Sec) && checkOffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && checkPTDynamic(Phdr, Sec)) { Sections += - unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + + unwrapOrError(this->FileName, this->Obj.getSectionName(&Sec)) + .str() + " "; BelongsToSegment.insert(&Sec); } @@ -4368,10 +4331,11 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { // Display sections that do not belong to a segment. std::string Sections; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) Sections += - unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + ' '; + unwrapOrError(this->FileName, this->Obj.getSectionName(&Sec)).str() + + ' '; } if (!Sections.empty()) { OS << " None " << Sections << '\n'; @@ -4380,21 +4344,16 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { } namespace { -template struct RelSymbol { - const typename ELFT::Sym *Sym; - std::string Name; -}; -template -RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, +template +RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, const ELFDumper *Dumper, - const RelTy &Reloc) { - uint32_t SymIndex = Reloc.getSymbol(Obj->isMips64EL()); + const Relocation &Reloc) { auto WarnAndReturn = [&](const typename ELFT::Sym *Sym, const Twine &Reason) -> RelSymbol { reportWarning( createError("unable to get name of the dynamic symbol with index " + - Twine(SymIndex) + ": " + Reason), + Twine(Reloc.Symbol) + ": " + Reason), FileName); return {Sym, ""}; }; @@ -4407,13 +4366,13 @@ RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, // We might have an object without a section header. In this case the size of // Symbols is zero, because there is no way to know the size of the dynamic // table. We should allow this case and not print a warning. - if (!Symbols.empty() && SymIndex >= Symbols.size()) + if (!Symbols.empty() && Reloc.Symbol >= Symbols.size()) return WarnAndReturn( nullptr, "index is greater than or equal to the number of dynamic symbols (" + Twine(Symbols.size()) + ")"); - const typename ELFT::Sym *Sym = FirstSym + SymIndex; + const typename ELFT::Sym *Sym = FirstSym + Reloc.Symbol; Expected ErrOrName = Sym->getName(Dumper->getDynamicStringTable()); if (!ErrOrName) return WarnAndReturn(Sym, toString(ErrOrName.takeError())); @@ -4423,22 +4382,21 @@ RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, } // namespace template -template -void GNUStyle::printDynamicRelocation(const ELFO *Obj, const RelTy &R) { - RelSymbol S = getSymbolForReloc(Obj, this->FileName, this->dumper(), R); - printRelRelaReloc(Obj, S.Sym, S.Name, R); +void GNUStyle::printDynamicReloc(const Relocation &R) { + printRelRelaReloc( + R, getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R)); } template -static size_t getMaxDynamicTagSize(const ELFFile *Obj, +static size_t getMaxDynamicTagSize(const ELFFile &Obj, typename ELFT::DynRange Tags) { size_t Max = 0; for (const typename ELFT::Dyn &Dyn : Tags) - Max = std::max(Max, Obj->getDynamicTagAsString(Dyn.d_tag).size()); + Max = std::max(Max, Obj.getDynamicTagAsString(Dyn.d_tag).size()); return Max; } -template void GNUStyle::printDynamic(const ELFO *Obj) { +template void GNUStyle::printDynamic() { Elf_Dyn_Range Table = this->dumper()->dynamic_table(); if (Table.empty()) return; @@ -4448,12 +4406,12 @@ template void GNUStyle::printDynamic(const ELFO *Obj) { OS << "Dynamic section at offset " << format_hex(reinterpret_cast(DynamicTableRegion.Addr) - - Obj->base(), + this->Obj.base(), 1) << " contains " << Table.size() << " entries:\n"; // The type name is surrounded with round brackets, hence add 2. - size_t MaxTagSize = getMaxDynamicTagSize(Obj, Table) + 2; + size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table) + 2; // The "Name/Value" column should be indented from the "Type" column by N // spaces, where N = MaxTagSize - length of "Type" (4) + trailing // space (1) = 3. @@ -4464,86 +4422,72 @@ template void GNUStyle::printDynamic(const ELFO *Obj) { for (auto Entry : Table) { uintX_t Tag = Entry.getTag(); std::string Type = - std::string("(") + Obj->getDynamicTagAsString(Tag).c_str() + ")"; + std::string("(") + this->Obj.getDynamicTagAsString(Tag).c_str() + ")"; std::string Value = this->dumper()->getDynamicEntry(Tag, Entry.getVal()); OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10) << format(ValueFmt.c_str(), Type.c_str()) << Value << "\n"; } } -template -void GNUStyle::printDynamicRelocations(const ELFO *Obj) { - const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); +template void GNUStyle::printDynamicRelocations() { + this->printDynamicRelocationsHelper(); +} + +template void DumpStyle::printDynamicRelocationsHelper() { + const bool IsMips64EL = this->Obj.isMips64EL(); const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); - const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); - const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); if (DynRelaRegion.Size > 0) { - OS << "\n'RELA' relocation section at offset " - << format_hex(reinterpret_cast(DynRelaRegion.Addr) - - Obj->base(), - 1) - << " contains " << DynRelaRegion.Size << " bytes:\n"; - printRelocHeader(ELF::SHT_RELA); + printDynamicRelocHeader(ELF::SHT_RELA, "RELA", DynRelaRegion); for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Obj, Rela); + printDynamicReloc(Relocation(Rela, IsMips64EL)); } + + const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); if (DynRelRegion.Size > 0) { - OS << "\n'REL' relocation section at offset " - << format_hex(reinterpret_cast(DynRelRegion.Addr) - - Obj->base(), - 1) - << " contains " << DynRelRegion.Size << " bytes:\n"; - printRelocHeader(ELF::SHT_REL); + printDynamicRelocHeader(ELF::SHT_REL, "REL", DynRelRegion); for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Obj, Rel); + printDynamicReloc(Relocation(Rel, IsMips64EL)); } + + const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); if (DynRelrRegion.Size > 0) { - OS << "\n'RELR' relocation section at offset " - << format_hex(reinterpret_cast(DynRelrRegion.Addr) - - Obj->base(), - 1) - << " contains " << DynRelrRegion.Size << " bytes:\n"; - printRelocHeader(ELF::SHT_REL); + printDynamicRelocHeader(ELF::SHT_REL, "RELR", DynRelrRegion); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &R : Obj->decode_relrs(Relrs)) - printDynamicRelocation(Obj, R); + for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs)) + printDynamicReloc(Relocation(Rel, IsMips64EL)); } - if (DynPLTRelRegion.Size) { - OS << "\n'PLT' relocation section at offset " - << format_hex(reinterpret_cast(DynPLTRelRegion.Addr) - - Obj->base(), - 1) - << " contains " << DynPLTRelRegion.Size << " bytes:\n"; + const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); + if (DynPLTRelRegion.Size) { if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { - printRelocHeader(ELF::SHT_RELA); + printDynamicRelocHeader(ELF::SHT_RELA, "PLT", DynPLTRelRegion); for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rela); + printDynamicReloc(Relocation(Rela, IsMips64EL)); } else { - printRelocHeader(ELF::SHT_REL); + printDynamicRelocHeader(ELF::SHT_REL, "PLT", DynPLTRelRegion); for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rel); + printDynamicReloc(Relocation(Rel, IsMips64EL)); } } } template void GNUStyle::printGNUVersionSectionProlog( - const ELFFile *Obj, const typename ELFT::Shdr *Sec, - const Twine &Label, unsigned EntriesNum) { - StringRef SecName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); + const typename ELFT::Shdr *Sec, const Twine &Label, unsigned EntriesNum) { + StringRef SecName = + unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)); OS << Label << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; StringRef SymTabName = ""; Expected SymTabOrErr = - Obj->getSection(Sec->sh_link); + this->Obj.getSection(Sec->sh_link); if (SymTabOrErr) SymTabName = - unwrapOrError(this->FileName, Obj->getSectionName(*SymTabOrErr)); + unwrapOrError(this->FileName, this->Obj.getSectionName(*SymTabOrErr)); else this->reportUniqueWarning(createError("invalid section linked to " + - describe(Obj, *Sec) + ": " + + describe(this->Obj, *Sec) + ": " + toString(SymTabOrErr.takeError()))); OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16) @@ -4552,16 +4496,15 @@ void GNUStyle::printGNUVersionSectionProlog( } template -void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void GNUStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { if (!Sec) return; - printGNUVersionSectionProlog(Obj, Sec, "Version symbols", + printGNUVersionSectionProlog(Sec, "Version symbols", Sec->sh_size / sizeof(Elf_Versym)); Expected> VerTableOrErr = this->dumper()->getVersionTable(Sec, /*SymTab=*/nullptr, - /*StrTab=*/nullptr); + /*StrTab=*/nullptr); if (!VerTableOrErr) { this->reportUniqueWarning(VerTableOrErr.takeError()); return; @@ -4581,9 +4524,10 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, this->dumper()->getSymbolVersionByIndex(Ndx, IsDefault); if (!NameOrErr) { if (!NameOrErr) - this->reportUniqueWarning(createError( - "unable to get a version for entry " + Twine(I) + " of " + - describe(Obj, *Sec) + ": " + toString(NameOrErr.takeError()))); + this->reportUniqueWarning( + createError("unable to get a version for entry " + Twine(I) + + " of " + describe(this->Obj, *Sec) + ": " + + toString(NameOrErr.takeError()))); Versions.emplace_back(""); continue; } @@ -4627,12 +4571,11 @@ static std::string versionFlagToString(unsigned Flags) { } template -void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void GNUStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { if (!Sec) return; - printGNUVersionSectionProlog(Obj, Sec, "Version definition", Sec->sh_info); + printGNUVersionSectionProlog(Sec, "Version definition", Sec->sh_info); Expected> V = this->dumper()->getVersionDefinitions(Sec); if (!V) { @@ -4655,13 +4598,12 @@ void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, } template -void GNUStyle::printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void GNUStyle::printVersionDependencySection(const Elf_Shdr *Sec) { if (!Sec) return; unsigned VerneedNum = Sec->sh_info; - printGNUVersionSectionProlog(Obj, Sec, "Version needs", VerneedNum); + printGNUVersionSectionProlog(Sec, "Version needs", VerneedNum); Expected> V = this->dumper()->getVersionDependencies(Sec); @@ -4796,11 +4738,10 @@ void GNUStyle::printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) { // dynamic symbol table. The table shows the number of hash buckets for // different lengths of chains as an absolute number and percentage of the total // buckets, and the cumulative coverage of symbols for each set of buckets. -template -void GNUStyle::printHashHistograms(const ELFFile *Obj) { +template void GNUStyle::printHashHistograms() { // Print histogram for the .hash section. if (const Elf_Hash *HashTable = this->dumper()->getHashTable()) { - if (Error E = checkHashTable(Obj, HashTable)) + if (Error E = checkHashTable(this->Obj, HashTable)) this->reportUniqueWarning(std::move(E)); else printHashHistogram(*HashTable); @@ -4808,20 +4749,18 @@ void GNUStyle::printHashHistograms(const ELFFile *Obj) { // Print histogram for the .gnu.hash section. if (const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable()) { - if (Error E = checkGNUHashTable(Obj, GnuHashTable)) + if (Error E = checkGNUHashTable(this->Obj, GnuHashTable)) this->reportUniqueWarning(std::move(E)); else printGnuHashHistogram(*GnuHashTable); } } -template -void GNUStyle::printCGProfile(const ELFFile *Obj) { +template void GNUStyle::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } -template -void GNUStyle::printAddrsig(const ELFFile *Obj) { +template void GNUStyle::printAddrsig() { reportError(createError("--addrsig: not implemented"), this->FileName); } @@ -5352,8 +5291,7 @@ static void printCoreNote(raw_ostream &OS, const CoreNote &Note) { } } -template -void GNUStyle::printNotes(const ELFFile *Obj) { +template void GNUStyle::printNotes() { auto PrintHeader = [&](Optional SecName, const typename ELFT::Off Offset, const typename ELFT::Addr Size) { @@ -5385,7 +5323,7 @@ void GNUStyle::printNotes(const ELFFile *Obj) { } else if (Name == "AMDGPU") { OS << getAMDGPUNoteTypeName(Type) << '\n'; } else { - StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + StringRef NoteType = this->Obj.getHeader()->e_type == ELF::ET_CORE ? getCoreNoteTypeName(Type) : getGenericNoteTypeName(Type); if (!NoteType.empty()) @@ -5425,21 +5363,21 @@ void GNUStyle::printNotes(const ELFFile *Obj) { } }; - ArrayRef Sections = cantFail(Obj->sections()); - if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + ArrayRef Sections = cantFail(this->Obj.sections()); + if (this->Obj.getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { for (const auto &S : Sections) { if (S.sh_type != SHT_NOTE) continue; - PrintHeader(expectedToOptional(Obj->getSectionName(&S)), S.sh_offset, + PrintHeader(expectedToOptional(this->Obj.getSectionName(&S)), S.sh_offset, S.sh_size); Error Err = Error::success(); - for (auto Note : Obj->notes(S, Err)) + for (auto Note : this->Obj.notes(S, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); } } else { - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError( "unable to read program headers to locate the PT_NOTE segment: " + @@ -5452,7 +5390,7 @@ void GNUStyle::printNotes(const ELFFile *Obj) { continue; PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz); Error Err = Error::success(); - for (auto Note : Obj->notes(P, Err)) + for (auto Note : this->Obj.notes(P, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); @@ -5460,14 +5398,12 @@ void GNUStyle::printNotes(const ELFFile *Obj) { } } -template -void GNUStyle::printELFLinkerOptions(const ELFFile *Obj) { +template void GNUStyle::printELFLinkerOptions() { OS << "printELFLinkerOptions not implemented!\n"; } template void DumpStyle::printDependentLibsHelper( - const ELFFile *Obj, function_ref OnSectionStart, function_ref OnLibEntry) { auto Warn = [this](unsigned SecNdx, StringRef Msg) { @@ -5477,14 +5413,14 @@ void DumpStyle::printDependentLibsHelper( }; unsigned I = -1; - for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { + for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) { ++I; if (Shdr.sh_type != ELF::SHT_LLVM_DEPENDENT_LIBRARIES) continue; OnSectionStart(Shdr); - Expected> ContentsOrErr = Obj->getSectionContents(&Shdr); + Expected> ContentsOrErr = Obj.getSectionContents(&Shdr); if (!ContentsOrErr) { Warn(I, toString(ContentsOrErr.takeError())); continue; @@ -5505,8 +5441,7 @@ void DumpStyle::printDependentLibsHelper( } template -void DumpStyle::printRelocationsHelper(const ELFFile *Obj, - const Elf_Shdr &Sec) { +void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { auto Warn = [&](Error &&E, const Twine &Prefix = "unable to read relocations from") { this->reportUniqueWarning(createError(Prefix + " " + describe(Obj, Sec) + @@ -5518,7 +5453,7 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, // a symbol table. const Elf_Shdr *SymTab; if (Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_RELR) { - Expected SymTabOrErr = Obj->getSection(Sec.sh_link); + Expected SymTabOrErr = Obj.getSection(Sec.sh_link); if (!SymTabOrErr) { Warn(SymTabOrErr.takeError(), "unable to locate a symbol table for"); return; @@ -5526,28 +5461,28 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, SymTab = *SymTabOrErr; } - unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); unsigned RelNdx = 0; + const bool IsMips64EL = this->Obj.isMips64EL(); switch (Sec.sh_type) { case ELF::SHT_REL: - if (Expected RangeOrErr = Obj->rels(&Sec)) { + if (Expected RangeOrErr = Obj.rels(&Sec)) { for (const Elf_Rel &R : *RangeOrErr) - printRelReloc(Obj, SecNdx, SymTab, R, ++RelNdx); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } break; case ELF::SHT_RELA: - if (Expected RangeOrErr = Obj->relas(&Sec)) { + if (Expected RangeOrErr = Obj.relas(&Sec)) { for (const Elf_Rela &R : *RangeOrErr) - printRelaReloc(Obj, SecNdx, SymTab, R, ++RelNdx); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: { - Expected RangeOrErr = Obj->relrs(&Sec); + Expected RangeOrErr = Obj.relrs(&Sec); if (!RangeOrErr) { Warn(RangeOrErr.takeError()); break; @@ -5558,15 +5493,16 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, break; } - for (const Elf_Rel &R : Obj->decode_relrs(*RangeOrErr)) - printRelReloc(Obj, SecNdx, /*SymTab=*/nullptr, R, ++RelNdx); + for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr)) + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, + /*SymTab=*/nullptr); break; } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: - if (Expected> RelasOrErr = Obj->android_relas(&Sec)) { + if (Expected> RelasOrErr = Obj.android_relas(&Sec)) { for (const Elf_Rela &R : *RelasOrErr) - printRelaReloc(Obj, SecNdx, SymTab, R, ++RelNdx); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { Warn(RelasOrErr.takeError()); } @@ -5575,11 +5511,10 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, } template -StringRef DumpStyle::getPrintableSectionName(const ELFFile *Obj, - const Elf_Shdr &Sec) const { +StringRef DumpStyle::getPrintableSectionName(const Elf_Shdr &Sec) const { StringRef Name = ""; if (Expected SecNameOrErr = - Obj->getSectionName(&Sec, this->dumper()->WarningHandler)) + Obj.getSectionName(&Sec, this->dumper()->WarningHandler)) Name = *SecNameOrErr; else this->reportUniqueWarning(createError("unable to get the name of " + @@ -5588,8 +5523,7 @@ StringRef DumpStyle::getPrintableSectionName(const ELFFile *Obj, return Name; } -template -void GNUStyle::printDependentLibs(const ELFFile *Obj) { +template void GNUStyle::printDependentLibs() { bool SectionStarted = false; struct NameOffset { StringRef Name; @@ -5613,13 +5547,13 @@ void GNUStyle::printDependentLibs(const ELFFile *Obj) { PrintSection(); SectionStarted = true; Current.Offset = Shdr.sh_offset; - Current.Name = this->getPrintableSectionName(Obj, Shdr); + Current.Name = this->getPrintableSectionName(Shdr); }; auto OnLibEntry = [&](StringRef Lib, uint64_t Offset) { SecEntries.push_back(NameOffset{Lib, Offset}); }; - this->printDependentLibsHelper(Obj, OnSectionStart, OnLibEntry); + this->printDependentLibsHelper(OnSectionStart, OnLibEntry); if (SectionStarted) PrintSection(); } @@ -5648,7 +5582,7 @@ template void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, Optional FunctionSec, - const StringRef SectionName, + const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) { // This function ignores potentially erroneous input, unless it is directly @@ -5693,8 +5627,8 @@ void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, if (*Offset == PrevOffset) { reportWarning( createStringError(object_error::parse_failed, - "could not extract a valid stack size in section %s", - SectionName.data()), + "could not extract a valid stack size in " + + describe(*Obj->getELFFile(), StackSizeSec)), Obj->getFileName()); return; } @@ -5714,7 +5648,7 @@ template void DumpStyle::printStackSize(const ELFObjectFile *Obj, RelocationRef Reloc, SectionRef FunctionSec, - const StringRef &StackSizeSectionName, + const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data) { // This function ignores potentially erroneous input, unless it is directly @@ -5755,15 +5689,15 @@ void DumpStyle::printStackSize(const ELFObjectFile *Obj, reportUniqueWarning(createStringError( object_error::parse_failed, "found invalid relocation offset (0x" + Twine::utohexstr(Offset) + - ") into section " + StackSizeSectionName + + ") into " + describe(*Obj->getELFFile(), StackSizeSec) + " while trying to extract a stack size entry")); return; } uint64_t Addend = Data.getAddress(&Offset); uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend); - this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSectionName, - Data, &Offset); + this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSec, Data, + &Offset); } template @@ -5773,8 +5707,7 @@ void DumpStyle::printNonRelocatableStackSizes( // related to stack size reporting. const ELFFile *EF = Obj->getELFFile(); for (const SectionRef &Sec : Obj->sections()) { - StringRef SectionName = getSectionName(Sec); - if (SectionName != ".stack_sizes") + if (getSectionName(Sec) != ".stack_sizes") continue; PrintHeader(); const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); @@ -5788,13 +5721,13 @@ void DumpStyle::printNonRelocatableStackSizes( if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning(createStringError( object_error::parse_failed, - describe(EF, *ElfSec) + + describe(*EF, *ElfSec) + " ended while trying to extract a stack size entry")); break; } uint64_t SymValue = Data.getAddress(&Offset); - printFunctionStackSize(Obj, SymValue, /*FunctionSec=*/None, SectionName, - Data, &Offset); + printFunctionStackSize(Obj, SymValue, /*FunctionSec=*/None, *ElfSec, Data, + &Offset); } } } @@ -5835,7 +5768,7 @@ void DumpStyle::printRelocatableStackSizes( if (!RelSecOrErr) { reportUniqueWarning( createStringError(object_error::parse_failed, - describe(Obj->getELFFile(), *ElfSec) + + describe(*Obj->getELFFile(), *ElfSec) + ": failed to get a relocated section: " + toString(RelSecOrErr.takeError()))); continue; @@ -5859,22 +5792,23 @@ void DumpStyle::printRelocatableStackSizes( PrintHeader(); const SectionRef &StackSizesSec = StackSizeMapEntry.first; const SectionRef &RelocSec = StackSizeMapEntry.second; + const Elf_Shdr *StackSizesELFSec = + Obj->getSection(StackSizesSec.getRawDataRefImpl()); // Warn about stack size sections without a relocation section. - StringRef StackSizeSectionName = getSectionName(StackSizesSec); if (RelocSec == NullSection) { - reportWarning(createError("section " + StackSizeSectionName + - " does not have a corresponding " - "relocation section"), - Obj->getFileName()); + reportWarning( + createError(".stack_sizes (" + + describe(*Obj->getELFFile(), *StackSizesELFSec) + + ") does not have a corresponding " + "relocation section"), + Obj->getFileName()); continue; } // A .stack_sizes section header's sh_link field is supposed to point // to the section that contains the functions whose stack sizes are // described in it. - const Elf_Shdr *StackSizesELFSec = - Obj->getSection(StackSizesSec.getRawDataRefImpl()); const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError( this->FileName, EF->getSection(StackSizesELFSec->sh_link))); @@ -5891,13 +5825,13 @@ void DumpStyle::printRelocatableStackSizes( Obj->getSection(RelocSec.getRawDataRefImpl()); reportUniqueWarning(createStringError( object_error::parse_failed, - describe(EF, *RelocSecShdr) + + describe(*EF, *RelocSecShdr) + " contains an unsupported relocation with index " + Twine(I) + ": " + EF->getRelocationTypeName(Reloc.getType()))); continue; } - this->printStackSize(Obj, Reloc, FunctionSec, StackSizeSectionName, - Resolver, Data); + this->printStackSize(Obj, Reloc, FunctionSec, *StackSizesELFSec, Resolver, + Data); } } } @@ -5989,8 +5923,7 @@ void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { OS.PadToColumn(40 + 3 * Bias); OS << printEnum(Sym->getType(), makeArrayRef(ElfSymbolTypes)); OS.PadToColumn(48 + 3 * Bias); - OS << getSymbolSectionNdx(Parser.Obj, Sym, - this->dumper()->dynamic_symbols().begin()); + OS << getSymbolSectionNdx(Sym, this->dumper()->dynamic_symbols().begin()); OS.PadToColumn(52 + 3 * Bias); OS << SymName << "\n"; } @@ -6039,8 +5972,7 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { OS.PadToColumn(29 + 3 * Bias); OS << printEnum(Sym->getType(), makeArrayRef(ElfSymbolTypes)); OS.PadToColumn(37 + 3 * Bias); - OS << getSymbolSectionNdx(Parser.Obj, Sym, - this->dumper()->dynamic_symbols().begin()); + OS << getSymbolSectionNdx(Sym, this->dumper()->dynamic_symbols().begin()); OS.PadToColumn(41 + 3 * Bias); OS << SymName << "\n"; } @@ -6101,8 +6033,8 @@ void GNUStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { OS << "\n"; } -template void LLVMStyle::printFileHeaders(const ELFO *Obj) { - const Elf_Ehdr *E = Obj->getHeader(); +template void LLVMStyle::printFileHeaders() { + const Elf_Ehdr *E = this->Obj.getHeader(); { DictScope D(W, "ElfHeader"); { @@ -6155,16 +6087,15 @@ template void LLVMStyle::printFileHeaders(const ELFO *Obj) { W.printNumber("ProgramHeaderCount", E->e_phnum); W.printNumber("SectionHeaderEntrySize", E->e_shentsize); W.printString("SectionHeaderCount", - getSectionHeadersNumString(Obj, this->FileName)); + getSectionHeadersNumString(this->Obj, this->FileName)); W.printString("StringTableSectionIndex", - getSectionHeaderTableIndexString(Obj, this->FileName)); + getSectionHeaderTableIndexString(this->Obj, this->FileName)); } } -template -void LLVMStyle::printGroupSections(const ELFO *Obj) { +template void LLVMStyle::printGroupSections() { DictScope Lists(W, "Groups"); - std::vector V = getGroups(Obj, this->FileName); + std::vector V = getGroups(this->Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { DictScope D(W, "Group"); @@ -6193,90 +6124,77 @@ void LLVMStyle::printGroupSections(const ELFO *Obj) { W.startLine() << "There are no group sections in the file.\n"; } -template void LLVMStyle::printRelocations(const ELFO *Obj) { +template void LLVMStyle::printRelocations() { ListScope D(W, "Relocations"); - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (!isRelocationSec(Sec)) continue; - StringRef Name = this->getPrintableSectionName(Obj, Sec); - unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); + StringRef Name = this->getPrintableSectionName(Sec); + unsigned SecNdx = &Sec - &cantFail(this->Obj.sections()).front(); W.startLine() << "Section (" << SecNdx << ") " << Name << " {\n"; W.indent(); - this->printRelocationsHelper(Obj, Sec); + this->printRelocationsHelper(Sec); W.unindent(); W.startLine() << "}\n"; } } -template -void LLVMStyle::printRelReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rel &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, R, RelIndex, SymTab); -} - -template -void LLVMStyle::printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, R, RelIndex, SymTab); -} - template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { W.startLine() << W.hex(R) << "\n"; } template -template -void LLVMStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, - const RelTy &Rel, unsigned RelIndex, - const Elf_Shdr *SymTab) { - Expected> Target = - this->dumper()->getRelocationTarget(SymTab, Rel); +void LLVMStyle::printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { + Expected> Target = + this->dumper()->getRelocationTarget(R, SymTab); if (!Target) { this->reportUniqueWarning(createError( - "unable to print relocation " + Twine(RelIndex) + " in section " + - Twine(SecIndex) + ": " + toString(Target.takeError()))); + "unable to print relocation " + Twine(RelIndex) + " in " + + describe(this->Obj, Sec) + ": " + toString(Target.takeError()))); return; } - std::string TargetName = Target->second; + printRelRelaReloc(R, Target->Name); +} + +template +void LLVMStyle::printRelRelaReloc(const Relocation &R, + StringRef SymbolName) { SmallString<32> RelocName; - Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); + this->Obj.getRelocationTypeName(R.Type, RelocName); - uintX_t Addend = getAddend(Rel).getValueOr(0); + uintX_t Addend = R.Addend.getValueOr(0); if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); - W.printHex("Offset", Rel.r_offset); - W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL())); - W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-", - Rel.getSymbol(Obj->isMips64EL())); + W.printHex("Offset", R.Offset); + W.printNumber("Type", RelocName, R.Type); + W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol); W.printHex("Addend", Addend); } else { raw_ostream &OS = W.startLine(); - OS << W.hex(Rel.r_offset) << " " << RelocName << " " - << (!TargetName.empty() ? TargetName : "-") << " " << W.hex(Addend) + OS << W.hex(R.Offset) << " " << RelocName << " " + << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Addend) << "\n"; } } -template -void LLVMStyle::printSectionHeaders(const ELFO *Obj) { +template void LLVMStyle::printSectionHeaders() { ListScope SectionsD(W, "Sections"); int SectionIndex = -1; std::vector> FlagsList = - getSectionFlagsForTarget(Obj->getHeader()->e_machine); - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + getSectionFlagsForTarget(this->Obj.getHeader()->e_machine); + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); - W.printNumber("Name", this->getPrintableSectionName(Obj, Sec), Sec.sh_name); - W.printHex( - "Type", - object::getELFSectionTypeName(Obj->getHeader()->e_machine, Sec.sh_type), - Sec.sh_type); + W.printNumber("Name", this->getPrintableSectionName(Sec), Sec.sh_name); + W.printHex("Type", + object::getELFSectionTypeName(this->Obj.getHeader()->e_machine, + Sec.sh_type), + Sec.sh_type); W.printFlags("Flags", Sec.sh_flags, makeArrayRef(FlagsList)); W.printHex("Address", Sec.sh_addr); W.printHex("Offset", Sec.sh_offset); @@ -6288,32 +6206,33 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { if (opts::SectionRelocations) { ListScope D(W, "Relocations"); - this->printRelocationsHelper(Obj, Sec); + this->printRelocationsHelper(Sec); } if (opts::SectionSymbols) { ListScope D(W, "Symbols"); if (const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec()) { StringRef StrTable = unwrapOrError( - this->FileName, Obj->getStringTableForSymtab(*Symtab)); + this->FileName, this->Obj.getStringTableForSymtab(*Symtab)); for (const Elf_Sym &Sym : - unwrapOrError(this->FileName, Obj->symbols(Symtab))) { - const Elf_Shdr *SymSec = unwrapOrError( - this->FileName, - Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable())); + unwrapOrError(this->FileName, this->Obj.symbols(Symtab))) { + const Elf_Shdr *SymSec = + unwrapOrError(this->FileName, + this->Obj.getSection( + &Sym, Symtab, this->dumper()->getShndxTable())); if (SymSec == &Sec) - printSymbol( - Obj, &Sym, - unwrapOrError(this->FileName, Obj->symbols(Symtab)).begin(), - StrTable, false, false); + printSymbol(&Sym, + unwrapOrError(this->FileName, this->Obj.symbols(Symtab)) + .begin(), + StrTable, false, false); } } } if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) { ArrayRef Data = - unwrapOrError(this->FileName, Obj->getSectionContents(&Sec)); + unwrapOrError(this->FileName, this->Obj.getSectionContents(&Sec)); W.printBinaryBlock( "SectionData", StringRef(reinterpret_cast(Data.data()), Data.size())); @@ -6351,8 +6270,7 @@ void LLVMStyle::printSymbolSection(const Elf_Sym *Symbol, } template -void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, - const Elf_Sym *First, +void LLVMStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/) { std::string FullSymbolName = @@ -6364,7 +6282,7 @@ void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, W.printHex("Value", Symbol->st_value); W.printNumber("Size", Symbol->st_size); W.printEnum("Binding", Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); - if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) W.printEnum("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -6376,7 +6294,7 @@ void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, else { std::vector> SymOtherFlags(std::begin(ElfSymOtherFlags), std::end(ElfSymOtherFlags)); - if (Obj->getHeader()->e_machine == EM_MIPS) { + if (this->Obj.getHeader()->e_machine == EM_MIPS) { // Someones in their infinite wisdom decided to make STO_MIPS_MIPS16 // flag overlapped with other ST_MIPS_xxx flags. So consider both // cases separately. @@ -6395,33 +6313,32 @@ void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, } template -void LLVMStyle::printSymbols(const ELFO *Obj, bool PrintSymbols, +void LLVMStyle::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { if (PrintSymbols) - printSymbols(Obj); + printSymbols(); if (PrintDynamicSymbols) - printDynamicSymbols(Obj); + printDynamicSymbols(); } -template void LLVMStyle::printSymbols(const ELFO *Obj) { +template void LLVMStyle::printSymbols() { ListScope Group(W, "Symbols"); this->dumper()->printSymbolsHelper(false); } -template -void LLVMStyle::printDynamicSymbols(const ELFO *Obj) { +template void LLVMStyle::printDynamicSymbols() { ListScope Group(W, "DynamicSymbols"); this->dumper()->printSymbolsHelper(true); } -template void LLVMStyle::printDynamic(const ELFFile *Obj) { +template void LLVMStyle::printDynamic() { Elf_Dyn_Range Table = this->dumper()->dynamic_table(); if (Table.empty()) return; W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n"; - size_t MaxTagSize = getMaxDynamicTagSize(Obj, Table); + size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table); // The "Name/Value" column should be indented from the "Type" column by N // spaces, where N = MaxTagSize - length of "Type" (4) + trailing // space (1) = -3. @@ -6435,84 +6352,40 @@ template void LLVMStyle::printDynamic(const ELFFile *Ob W.startLine() << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10, true) << " " << format(ValueFmt.c_str(), - Obj->getDynamicTagAsString(Tag).c_str()) + this->Obj.getDynamicTagAsString(Tag).c_str()) << Value << "\n"; } W.startLine() << "]\n"; } -template -void LLVMStyle::printDynamicRelocations(const ELFO *Obj) { - const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); - const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); - const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); - const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); - +template void LLVMStyle::printDynamicRelocations() { W.startLine() << "Dynamic Relocations {\n"; W.indent(); - if (DynRelaRegion.Size > 0) { - for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Obj, Rela); - } - if (DynRelRegion.Size > 0) { - for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Obj, Rel); - } - - if (DynRelrRegion.Size > 0) { - Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &R : Obj->decode_relrs(Relrs)) - printDynamicRelocation(Obj, R); - } - if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) - for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rela); - else - for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rel); - + this->printDynamicRelocationsHelper(); W.unindent(); W.startLine() << "}\n"; } template - template -void LLVMStyle::printDynamicRelocation(const ELFO *Obj, const RelTy& Rel) { - SmallString<32> RelocName; - Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); - std::string SymbolName = - getSymbolForReloc(Obj, this->FileName, this->dumper(), Rel).Name; - - uintX_t Addend = getAddend(Rel).getValueOr(0); - if (opts::ExpandRelocs) { - DictScope Group(W, "Relocation"); - W.printHex("Offset", Rel.r_offset); - W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL())); - W.printString("Symbol", !SymbolName.empty() ? SymbolName : "-"); - W.printHex("Addend", Addend); - } else { - raw_ostream &OS = W.startLine(); - OS << W.hex(Rel.r_offset) << " " << RelocName << " " - << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Addend) - << "\n"; - } +void LLVMStyle::printDynamicReloc(const Relocation &R) { + RelSymbol S = + getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R); + printRelRelaReloc(R, S.Name); } template void LLVMStyle::printProgramHeaders( - const ELFO *Obj, bool PrintProgramHeaders, - cl::boolOrDefault PrintSectionMapping) { + bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) - printProgramHeaders(Obj); + printProgramHeaders(); if (PrintSectionMapping == cl::BOU_TRUE) - printSectionMapping(Obj); + printSectionMapping(); } -template -void LLVMStyle::printProgramHeaders(const ELFO *Obj) { +template void LLVMStyle::printProgramHeaders() { ListScope L(W, "ProgramHeaders"); - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError("unable to dump program headers: " + toString(PhdrsOrErr.takeError()))); @@ -6522,7 +6395,7 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { for (const Elf_Phdr &Phdr : *PhdrsOrErr) { DictScope P(W, "ProgramHeader"); StringRef Type = - segmentTypeToString(Obj->getHeader()->e_machine, Phdr.p_type); + segmentTypeToString(this->Obj.getHeader()->e_machine, Phdr.p_type); W.printHex("Type", Type.empty() ? "Unknown" : Type, Phdr.p_type); W.printHex("Offset", Phdr.p_offset); @@ -6536,8 +6409,7 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { } template -void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void LLVMStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { ListScope SS(W, "VersionSymbols"); if (!Sec) return; @@ -6557,8 +6429,8 @@ void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, for (size_t I = 0, E = Syms.size(); I < E; ++I) { DictScope S(W, "Symbol"); W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION); - W.printString("Name", this->dumper()->getFullSymbolName( - &Syms[I], StrTable, /*IsDynamic=*/true)); + W.printString("Name", this->dumper()->getFullSymbolName(&Syms[I], StrTable, + /*IsDynamic=*/true)); } } @@ -6568,8 +6440,7 @@ static const EnumEntry SymVersionFlags[] = { {"Info", "INFO", VER_FLG_INFO}}; template -void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void LLVMStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionDefinitions"); if (!Sec) return; @@ -6594,8 +6465,7 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, } template -void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void LLVMStyle::printVersionDependencySection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionRequirements"); if (!Sec) return; @@ -6624,19 +6494,17 @@ void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, } } -template -void LLVMStyle::printHashHistograms(const ELFFile *Obj) { +template void LLVMStyle::printHashHistograms() { W.startLine() << "Hash Histogram not implemented!\n"; } -template -void LLVMStyle::printCGProfile(const ELFFile *Obj) { +template void LLVMStyle::printCGProfile() { ListScope L(W, "CGProfile"); if (!this->dumper()->getDotCGProfileSec()) return; Expected> CGProfileOrErr = - Obj->template getSectionContentsAsArray( + this->Obj.template getSectionContentsAsArray( this->dumper()->getDotCGProfileSec()); if (!CGProfileOrErr) { this->reportUniqueWarning( @@ -6670,14 +6538,13 @@ static Expected> toULEB128Array(ArrayRef Data) { return Ret; } -template -void LLVMStyle::printAddrsig(const ELFFile *Obj) { +template void LLVMStyle::printAddrsig() { ListScope L(W, "Addrsig"); if (!this->dumper()->getDotAddrsigSec()) return; ArrayRef Contents = unwrapOrError( this->FileName, - Obj->getSectionContents(this->dumper()->getDotAddrsigSec())); + this->Obj.getSectionContents(this->dumper()->getDotAddrsigSec())); Expected> V = toULEB128Array(Contents); if (!V) { reportWarning(V.takeError(), this->FileName); @@ -6737,8 +6604,7 @@ static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) { } } -template -void LLVMStyle::printNotes(const ELFFile *Obj) { +template void LLVMStyle::printNotes() { ListScope L(W, "Notes"); auto PrintHeader = [&](Optional SecName, @@ -6767,7 +6633,7 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { } else if (Name == "AMDGPU") { W.printString("Type", getAMDGPUNoteTypeName(Type)); } else { - StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + StringRef NoteType = this->Obj.getHeader()->e_type == ELF::ET_CORE ? getCoreNoteTypeName(Type) : getGenericNoteTypeName(Type); if (!NoteType.empty()) @@ -6805,22 +6671,22 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { } }; - ArrayRef Sections = cantFail(Obj->sections()); - if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + ArrayRef Sections = cantFail(this->Obj.sections()); + if (this->Obj.getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { for (const auto &S : Sections) { if (S.sh_type != SHT_NOTE) continue; DictScope D(W, "NoteSection"); - PrintHeader(expectedToOptional(Obj->getSectionName(&S)), S.sh_offset, + PrintHeader(expectedToOptional(this->Obj.getSectionName(&S)), S.sh_offset, S.sh_size); Error Err = Error::success(); - for (auto Note : Obj->notes(S, Err)) + for (auto Note : this->Obj.notes(S, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); } } else { - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError( "unable to read program headers to locate the PT_NOTE segment: " + @@ -6834,7 +6700,7 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { DictScope D(W, "NoteSection"); PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz); Error Err = Error::success(); - for (auto Note : Obj->notes(P, Err)) + for (auto Note : this->Obj.notes(P, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); @@ -6842,17 +6708,17 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { } } -template -void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { +template void LLVMStyle::printELFLinkerOptions() { ListScope L(W, "LinkerOptions"); unsigned I = -1; - for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { + for (const Elf_Shdr &Shdr : cantFail(this->Obj.sections())) { ++I; if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS) continue; - Expected> ContentsOrErr = Obj->getSectionContents(&Shdr); + Expected> ContentsOrErr = + this->Obj.getSectionContents(&Shdr); if (!ContentsOrErr) { this->reportUniqueWarning( createError("unable to read the content of the " @@ -6887,11 +6753,10 @@ void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { } } -template -void LLVMStyle::printDependentLibs(const ELFFile *Obj) { +template void LLVMStyle::printDependentLibs() { ListScope L(W, "DependentLibs"); this->printDependentLibsHelper( - Obj, [](const Elf_Shdr &) {}, + [](const Elf_Shdr &) {}, [this](StringRef Lib, uint64_t) { W.printString(Lib); }); } diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index ea28d98cbe275..e8def4ff9a6a4 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -67,3 +67,5 @@ def : Flag<["--"], "inlining=false">, Alias, HelpText<"Alias for --n // Compatibility aliases for pprof's symbolizer. def : Flag<["-"], "demangle=true">, Alias, HelpText<"Alias for --demangle">; def : Flag<["-"], "demangle=false">, Alias, HelpText<"Alias for --no-demangle">; +// Compatibility no-op options. +def : Flag<["--"], "use-symbol-table=true">; diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 632ec1bc9af41..9f524479bb04c 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -197,12 +197,22 @@ bool ELFDumper::shouldPrintSection(const ELFYAML::Section &S, // entry but their section headers may have special flags, entry size, address // alignment, etc. We will preserve the header for them under such // circumstances. - if (DWARF && DWARF->getNonEmptySectionNames().count(S.Name.substr(1))) { + StringRef SecName = S.Name.substr(1); + if (DWARF && DWARF->getNonEmptySectionNames().count(SecName)) { if (const ELFYAML::RawContentSection *RawSec = - dyn_cast(&S)) - return RawSec->Type != ELF::SHT_PROGBITS || RawSec->Flags || - !RawSec->Link.empty() || RawSec->Info || - RawSec->AddressAlign != 1 || RawSec->EntSize; + dyn_cast(&S)) { + if (RawSec->Type != ELF::SHT_PROGBITS || !RawSec->Link.empty() || + RawSec->Info || RawSec->AddressAlign != 1 || RawSec->Address || + RawSec->EntSize) + return true; + + ELFYAML::ELF_SHF ShFlags = RawSec->Flags.getValueOr(ELFYAML::ELF_SHF(0)); + + if (SecName == "debug_str") + return ShFlags != ELFYAML::ELF_SHF(ELF::SHF_MERGE | ELF::SHF_STRINGS); + + return ShFlags != 0; + } } // Normally we use "Symbols:" and "DynamicSymbols:" to describe contents of @@ -404,6 +414,8 @@ Optional ELFDumper::dumpDWARFSections( if (RawSec->Name == ".debug_aranges") Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); + else if (RawSec->Name == ".debug_str") + dumpDebugStrings(*DWARFCtx.get(), DWARF); // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. @@ -622,7 +634,8 @@ Error ELFDumper::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab, } template -static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType) { +static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType, + StringRef SecName) { switch (SecType) { case ELF::SHT_REL: return sizeof(typename ELFT::Rel); @@ -633,6 +646,8 @@ static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType) { case ELF::SHT_DYNAMIC: return sizeof(typename ELFT::Dyn); default: + if (SecName == ".debug_str") + return 1; return 0; } } @@ -649,9 +664,6 @@ Error ELFDumper::dumpCommonSection(const Elf_Shdr *Shdr, S.Address = static_cast(Shdr->sh_addr); S.AddressAlign = Shdr->sh_addralign; - if (Shdr->sh_entsize != getDefaultShEntSize(S.Type)) - S.EntSize = static_cast(Shdr->sh_entsize); - S.OriginalSecNdx = Shdr - &Sections[0]; auto NameOrErr = getUniquedSectionName(Shdr); @@ -659,6 +671,9 @@ Error ELFDumper::dumpCommonSection(const Elf_Shdr *Shdr, return NameOrErr.takeError(); S.Name = NameOrErr.get(); + if (Shdr->sh_entsize != getDefaultShEntSize(S.Type, S.Name)) + S.EntSize = static_cast(Shdr->sh_entsize); + if (Shdr->sh_link != ELF::SHN_UNDEF) { auto LinkSection = Obj.getSection(Shdr->sh_link); if (!LinkSection) diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index f7623da2c055e..584ef65b20bce 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp index 30ff37536fafc..faf6f7087ac0c 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp @@ -719,9 +719,9 @@ TEST_F(AArch64GISelMITest, TestKnownBitsUMax) { KnownBits KnownUmax = Info.getKnownBits(CopyUMax); EXPECT_EQ(64u, KnownUmax.getBitWidth()); - EXPECT_EQ(0u, KnownUmax.Zero.getZExtValue()); + EXPECT_EQ(0xffu, KnownUmax.Zero.getZExtValue()); EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue()); - EXPECT_EQ(0u, KnownUmax.Zero.getZExtValue()); + EXPECT_EQ(0xffu, KnownUmax.Zero.getZExtValue()); EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue()); } diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index d4d9a05f8c4e2..ebbbae0af0936 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -282,3 +282,18 @@ TEST(ELFObjectFileTest, MachineTestForMSP430) { for (const DataForTest &D : generateData(ELF::EM_MSP430)) checkFormatAndArch(D, Formats[I++], Triple::msp430); } + +TEST(ELFObjectFileTest, MachineTestForCSKY) { + std::array Formats = {"elf32-csky", "elf32-csky", + "elf64-unknown", "elf64-unknown"}; + size_t I = 0; + for (const DataForTest &D : generateData(ELF::EM_CSKY)) + checkFormatAndArch(D, Formats[I++], Triple::csky); +} + + + +// ELF relative relocation type test. +TEST(ELFObjectFileTest, RelativeRelocationTypeTest) { + EXPECT_EQ(ELF::R_CKCORE_RELATIVE, getELFRelativeRelocationType(ELF::EM_CSKY)); +} diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index 694e5c4dcc712..89555a5881a53 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -103,13 +103,15 @@ TEST(KnownBitsTest, BinaryExhaustive) { unsigned Bits = 4; ForeachKnownBits(Bits, [&](const KnownBits &Known1) { ForeachKnownBits(Bits, [&](const KnownBits &Known2) { - KnownBits KnownAnd(Bits), KnownOr(Bits), KnownXor(Bits); + KnownBits KnownAnd(Bits); KnownAnd.Zero.setAllBits(); KnownAnd.One.setAllBits(); - KnownOr.Zero.setAllBits(); - KnownOr.One.setAllBits(); - KnownXor.Zero.setAllBits(); - KnownXor.One.setAllBits(); + KnownBits KnownOr(KnownAnd); + KnownBits KnownXor(KnownAnd); + KnownBits KnownUMax(KnownAnd); + KnownBits KnownUMin(KnownAnd); + KnownBits KnownSMax(KnownAnd); + KnownBits KnownSMin(KnownAnd); ForeachNumInKnownBits(Known1, [&](const APInt &N1) { ForeachNumInKnownBits(Known2, [&](const APInt &N2) { @@ -126,6 +128,22 @@ TEST(KnownBitsTest, BinaryExhaustive) { Res = N1 ^ N2; KnownXor.One &= Res; KnownXor.Zero &= ~Res; + + Res = APIntOps::umax(N1, N2); + KnownUMax.One &= Res; + KnownUMax.Zero &= ~Res; + + Res = APIntOps::umin(N1, N2); + KnownUMin.One &= Res; + KnownUMin.Zero &= ~Res; + + Res = APIntOps::smax(N1, N2); + KnownSMax.One &= Res; + KnownSMax.Zero &= ~Res; + + Res = APIntOps::smin(N1, N2); + KnownSMin.One &= Res; + KnownSMin.Zero &= ~Res; }); }); @@ -140,6 +158,22 @@ TEST(KnownBitsTest, BinaryExhaustive) { KnownBits ComputedXor = Known1 ^ Known2; EXPECT_EQ(KnownXor.Zero, ComputedXor.Zero); EXPECT_EQ(KnownXor.One, ComputedXor.One); + + KnownBits ComputedUMax = KnownBits::umax(Known1, Known2); + EXPECT_EQ(KnownUMax.Zero, ComputedUMax.Zero); + EXPECT_EQ(KnownUMax.One, ComputedUMax.One); + + KnownBits ComputedUMin = KnownBits::umin(Known1, Known2); + EXPECT_EQ(KnownUMin.Zero, ComputedUMin.Zero); + EXPECT_EQ(KnownUMin.One, ComputedUMin.One); + + KnownBits ComputedSMax = KnownBits::smax(Known1, Known2); + EXPECT_EQ(KnownSMax.Zero, ComputedSMax.Zero); + EXPECT_EQ(KnownSMax.One, ComputedSMax.One); + + KnownBits ComputedSMin = KnownBits::smin(Known1, Known2); + EXPECT_EQ(KnownSMin.Zero, ComputedSMin.Zero); + EXPECT_EQ(KnownSMin.One, ComputedSMin.One); }); }); } diff --git a/llvm/utils/KillTheDoctor/CMakeLists.txt b/llvm/utils/KillTheDoctor/CMakeLists.txt index 72d994fb9953a..53b90b8204249 100644 --- a/llvm/utils/KillTheDoctor/CMakeLists.txt +++ b/llvm/utils/KillTheDoctor/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_utility(KillTheDoctor ) target_link_libraries(KillTheDoctor + PRIVATE LLVMSupport psapi ) diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index 4d645799dbf65..bb3d69d046bef 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -81,6 +81,7 @@ static_library("AST") { "ExternalASTMerger.cpp", "ExternalASTSource.cpp", "FormatString.cpp", + "IgnoreExpr.cpp", "InheritViz.cpp", "Interp/ByteCodeEmitter.cpp", "Interp/ByteCodeExprGen.cpp", diff --git a/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn index 5c97bebf137f0..1078ec8f2f430 100644 --- a/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn @@ -89,6 +89,7 @@ target(libclang_target_type, "libclang") { "FatalErrorHandler.cpp", "Index_Internal.h", "Indexing.cpp", + "Rewrite.cpp", ] if (host_os == "mac") { ldflags = [ diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn index 77e2b6d218f76..5fab007153e49 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -64,9 +64,9 @@ static_library("profile") { if (current_os != "win") { defines = [ - "COMPILER_RT_TARGET_HAS_ATOMICS", - "COMPILER_RT_TARGET_HAS_FCNTL_LCK", - "COMPILER_RT_TARGET_HAS_UNAME", + "COMPILER_RT_HAS_UNAME", + "COMPILER_RT_HAS_ATOMICS", + "COMPILER_RT_HAS_FCNTL_LCK", ] } } diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 0a45a48d9dcaf..e2f6c710496ec 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -128,6 +128,7 @@ static_library("CodeGen") { "MachineScheduler.cpp", "MachineSink.cpp", "MachineSizeOpts.cpp", + "MachineStableHash.cpp", "MachineStripDebug.cpp", "MachineTraceMetrics.cpp", "MachineVerifier.cpp", diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn index 648646e4188d6..16669c4219c93 100644 --- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn @@ -86,7 +86,7 @@ write_cmake_config("llvm-lit") { values = [ "LLVM_SOURCE_DIR=" + rebase_path("//llvm", dir), - "Python3_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=/usr/bin/env $python_path", "BUILD_MODE=.", "LLVM_LIT_CONFIG_MAP=" + config_map, ] diff --git a/mlir/examples/standalone/standalone-translate/standalone-translate.cpp b/mlir/examples/standalone/standalone-translate/standalone-translate.cpp index 7ccad3b4ee2be..f2f0ac56360da 100644 --- a/mlir/examples/standalone/standalone-translate/standalone-translate.cpp +++ b/mlir/examples/standalone/standalone-translate/standalone-translate.cpp @@ -11,102 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/AsmState.h" -#include "mlir/IR/Diagnostics.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/InitAllDialects.h" #include "mlir/InitAllTranslations.h" -#include "mlir/Support/FileUtilities.h" #include "mlir/Support/LogicalResult.h" -#include "mlir/Support/ToolUtilities.h" #include "mlir/Translation.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/ToolOutputFile.h" #include "Standalone/StandaloneDialect.h" -static llvm::cl::opt inputFilename(llvm::cl::Positional, - llvm::cl::desc(""), - llvm::cl::init("-")); - -static llvm::cl::opt - outputFilename("o", llvm::cl::desc("Output filename"), - llvm::cl::value_desc("filename"), llvm::cl::init("-")); - -static llvm::cl::opt - splitInputFile("split-input-file", - llvm::cl::desc("Split the input file into pieces and " - "process each chunk independently"), - llvm::cl::init(false)); - -static llvm::cl::opt verifyDiagnostics( - "verify-diagnostics", - llvm::cl::desc("Check that emitted diagnostics match " - "expected-* lines on the corresponding line"), - llvm::cl::init(false)); - int main(int argc, char **argv) { mlir::registerAllTranslations(); // TODO: Register standalone translations here. - llvm::InitLLVM y(argc, argv); - - // Add flags for all the registered translations. - llvm::cl::opt - translationRequested("", llvm::cl::desc("Translation to perform"), - llvm::cl::Required); - mlir::registerAsmPrinterCLOptions(); - mlir::registerMLIRContextCLOptions(); - llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR translation driver\n"); - - std::string errorMessage; - auto input = mlir::openInputFile(inputFilename, &errorMessage); - if (!input) { - llvm::errs() << errorMessage << "\n"; - return 1; - } - - auto output = mlir::openOutputFile(outputFilename, &errorMessage); - if (!output) { - llvm::errs() << errorMessage << "\n"; - return 1; - } - - // Processes the memory buffer with a new MLIRContext. - auto processBuffer = [&](std::unique_ptr ownedBuffer, - llvm::raw_ostream &os) { - mlir::MLIRContext context; - context.allowUnregisteredDialects(); - context.printOpOnDiagnostic(!verifyDiagnostics); - llvm::SourceMgr sourceMgr; - sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), llvm::SMLoc()); - - if (!verifyDiagnostics) { - mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); - return (*translationRequested)(sourceMgr, os, &context); - } - - // In the diagnostic verification flow, we ignore whether the translation - // failed (in most cases, it is expected to fail). Instead, we check if the - // diagnostics were produced as expected. - mlir::SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, - &context); - (*translationRequested)(sourceMgr, os, &context); - return sourceMgrHandler.verify(); - }; - - if (splitInputFile) { - if (failed(mlir::splitAndProcessBuffer(std::move(input), processBuffer, - output->os()))) - return 1; - } else { - if (failed(processBuffer(std::move(input), output->os()))) - return 1; - } - - output->keep(); - return 0; + return failed( + mlir::mlirTranslateMain(argc, argv, "MLIR Translation Testing Tool")); } diff --git a/mlir/include/mlir-c/StandardTypes.h b/mlir/include/mlir-c/StandardTypes.h index ad28ea5467171..eacfe0d39b6aa 100644 --- a/mlir/include/mlir-c/StandardTypes.h +++ b/mlir/include/mlir-c/StandardTypes.h @@ -162,6 +162,11 @@ int mlirTypeIsAVector(MlirType type); * is owned by the context. */ MlirType mlirVectorTypeGet(intptr_t rank, int64_t *shape, MlirType elementType); +/** Same as "mlirVectorTypeGet" but returns a nullptr wrapping MlirType on + * illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirVectorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, MlirLocation loc); + /*============================================================================*/ /* Ranked / Unranked Tensor type. */ /*============================================================================*/ @@ -180,10 +185,20 @@ int mlirTypeIsAUnrankedTensor(MlirType type); MlirType mlirRankedTensorTypeGet(intptr_t rank, int64_t *shape, MlirType elementType); +/** Same as "mlirRankedTensorTypeGet" but returns a nullptr wrapping MlirType on + * illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirRankedTensorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, MlirLocation loc); + /** Creates an unranked tensor type with the given element type in the same * context as the element type. The type is owned by the context. */ MlirType mlirUnrankedTensorTypeGet(MlirType elementType); +/** Same as "mlirUnrankedTensorTypeGet" but returns a nullptr wrapping MlirType + * on illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirUnrankedTensorTypeGetChecked(MlirType elementType, + MlirLocation loc); + /*============================================================================*/ /* Ranked / Unranked MemRef type. */ /*============================================================================*/ @@ -208,10 +223,23 @@ MlirType mlirMemRefTypeGet(MlirType elementType, intptr_t rank, int64_t *shape, MlirType mlirMemRefTypeContiguousGet(MlirType elementType, intptr_t rank, int64_t *shape, unsigned memorySpace); +/** Same as "mlirMemRefTypeContiguousGet" but returns a nullptr wrapping + * MlirType on illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirMemRefTypeContiguousGetChecked(MlirType elementType, intptr_t rank, + int64_t *shape, + unsigned memorySpace, + MlirLocation loc); + /** Creates an Unranked MemRef type with the given element type and in the given * memory space. The type is owned by the context of element type. */ MlirType mlirUnrankedMemRefTypeGet(MlirType elementType, unsigned memorySpace); +/** Same as "mlirUnrankedMemRefTypeGet" but returns a nullptr wrapping + * MlirType on illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirUnrankedMemRefTypeGetChecked(MlirType elementType, + unsigned memorySpace, + MlirLocation loc); + /** Returns the number of affine layout maps in the given MemRef type. */ intptr_t mlirMemRefTypeGetNumAffineMaps(MlirType type); diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index 5dd10932981ba..b04498598b290 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -23,7 +23,6 @@ #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" #include "mlir/Conversion/SCFToStandard/SCFToStandard.h" #include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h" -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 6686e28658138..d4b478dbf4ed0 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -239,17 +239,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { let summary = "Convert operations from the shape dialect into the standard " "dialect"; let constructor = "mlir::createConvertShapeToStandardPass()"; - let dependentDialects = ["StandardOpsDialect"]; -} - -//===----------------------------------------------------------------------===// -// ShapeToSCF -//===----------------------------------------------------------------------===// - -def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { - let summary = "Convert operations from the shape dialect to the SCF dialect"; - let constructor = "mlir::createConvertShapeToSCFPass()"; - let dependentDialects = ["scf::SCFDialect"]; + let dependentDialects = ["StandardOpsDialect", "scf::SCFDialect"]; } //===----------------------------------------------------------------------===// @@ -358,7 +348,10 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { let options = [ Option<"reassociateFPReductions", "reassociate-fp-reductions", "bool", /*default=*/"false", - "Allows llvm to reassociate floating-point reductions for speed"> + "Allows llvm to reassociate floating-point reductions for speed">, + Option<"enableIndexOptimizations", "enable-index-optimizations", + "bool", /*default=*/"false", + "Allows compiler to assume indices fit in 32-bit if that yields faster code"> ]; } diff --git a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h b/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h deleted file mode 100644 index f953f6e2ddf10..0000000000000 --- a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h +++ /dev/null @@ -1,27 +0,0 @@ -//===- ShapeToSCF.h - Conversion utils from Shape to SCF dialect ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ -#define MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ - -#include - -namespace mlir { - -class MLIRContext; -class FunctionPass; -class OwningRewritePatternList; - -void populateShapeToSCFConversionPatterns(OwningRewritePatternList &patterns, - MLIRContext *ctx); - -std::unique_ptr createConvertShapeToSCFPass(); - -} // namespace mlir - -#endif // MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ diff --git a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h index 82aa8287d90f3..81ffa63281357 100644 --- a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h +++ b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h @@ -22,8 +22,13 @@ class OperationPass; /// ConvertVectorToLLVM pass in include/mlir/Conversion/Passes.td struct LowerVectorToLLVMOptions { bool reassociateFPReductions = false; - LowerVectorToLLVMOptions &setReassociateFPReductions(bool r) { - reassociateFPReductions = r; + bool enableIndexOptimizations = false; + LowerVectorToLLVMOptions &setReassociateFPReductions(bool b) { + reassociateFPReductions = b; + return *this; + } + LowerVectorToLLVMOptions &setEnableIndexOptimizations(bool b) { + enableIndexOptimizations = b; return *this; } }; @@ -37,7 +42,8 @@ void populateVectorToLLVMMatrixConversionPatterns( /// Collect a set of patterns to convert from the Vector dialect to LLVM. void populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, - bool reassociateFPReductions = false); + bool reassociateFPReductions = false, + bool enableIndexOptimizations = false); /// Create a pass to convert vector operations to the LLVMIR dialect. std::unique_ptr> createConvertVectorToLLVMPass( diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 288031c598ff4..0ae6267cb67cb 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -21,7 +21,8 @@ include "mlir/Interfaces/SideEffectInterfaces.td" // Type constraint accepting standard integers, indices and wrapped LLVM integer // types. def IntLikeOrLLVMInt : TypeConstraint< - Or<[AnySignlessInteger.predicate, Index.predicate, LLVMInt.predicate]>, + Or<[AnySignlessInteger.predicate, Index.predicate, + LLVM_AnyInteger.predicate]>, "integer, index or LLVM dialect equivalent">; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 1f0eb6aab58a1..10755a436115f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -17,6 +17,10 @@ include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" +//===----------------------------------------------------------------------===// +// LLVM Dialect. +//===----------------------------------------------------------------------===// + def LLVM_Dialect : Dialect { let name = "llvm"; let cppNamespace = "LLVM"; @@ -38,34 +42,108 @@ def LLVM_Dialect : Dialect { }]; } -// LLVM IR type wrapped in MLIR. +//===----------------------------------------------------------------------===// +// LLVM dialect type constraints. +//===----------------------------------------------------------------------===// + +// LLVM dialect type. def LLVM_Type : DialectType()">, "LLVM dialect type">; -// Type constraint accepting only wrapped LLVM integer types. -def LLVMInt : TypeConstraint< - And<[LLVM_Type.predicate, - CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>, - "LLVM dialect integer">; +// Type constraint accepting LLVM integer types. +def LLVM_AnyInteger : Type< + CPred<"$_self.isa<::mlir::LLVM::LLVMIntegerType>()">, + "LLVM integer type">; + +// Type constraints accepting LLVM integer type of a specific width. +class LLVM_IntBase : + Type().getBitWidth() == " + # width>]>, + "LLVM " # width # "-bit integer type">, + BuildableType< + "::mlir::LLVM::LLVMIntegerType::get($_builder.getContext(), " + # width # ")">; + +def LLVM_i1 : LLVM_IntBase<1>; +def LLVM_i8 : LLVM_IntBase<8>; +def LLVM_i32 : LLVM_IntBase<32>; -def LLVMIntBase : TypeConstraint< +// Type constraint accepting LLVM primitive types, i.e. all types except void +// and function. +def LLVM_PrimitiveType : Type< And<[LLVM_Type.predicate, - CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>, - "LLVM dialect integer">; - -// Integer type of a specific width. -class LLVMI - : Type().isIntegerTy(" # width # ")">]>, - "LLVM dialect " # width # "-bit integer">, - BuildableType< - "::mlir::LLVM::LLVMType::getIntNTy($_builder.getContext()," - # width # ")">; - -def LLVMI1 : LLVMI<1>; + CPred<"!$_self.isa<::mlir::LLVM::LLVMVoidType, " + "::mlir::LLVM::LLVMFunctionType>()">]>, + "primitive LLVM type">; + +// Type constraint accepting any LLVM floating point type. +def LLVM_AnyFloat : Type< + CPred<"$_self.isa<::mlir::LLVM::LLVMBFloatType, " + "::mlir::LLVM::LLVMHalfType, " + "::mlir::LLVM::LLVMFloatType, " + "::mlir::LLVM::LLVMDoubleType>()">, + "floating point LLVM type">; + +// Type constraint accepting any LLVM pointer type. +def LLVM_AnyPointer : Type()">, + "LLVM pointer type">; + +// Type constraint accepting LLVM pointer type with an additional constraint +// on the element type. +class LLVM_PointerTo : Type< + And<[LLVM_AnyPointer.predicate, + SubstLeaves< + "$_self", + "$_self.cast<::mlir::LLVM::LLVMPointerType>().getElementType()", + pointee.predicate>]>, + "LLVM pointer to " # pointee.description>; + +// Type constraint accepting any LLVM structure type. +def LLVM_AnyStruct : Type()">, + "LLVM structure type">; + +// Type constraint accepting opaque LLVM structure type. +def LLVM_OpaqueStruct : Type< + And<[LLVM_AnyStruct.predicate, + CPred<"$_self.cast<::mlir::LLVM::LLVMStructType>().isOpaque()">]>>; + +// Type constraint accepting any LLVM type that can be loaded or stored, i.e. a +// type that has size (not void, function or opaque struct type). +def LLVM_LoadableType : Type< + And<[LLVM_PrimitiveType.predicate, Neg]>, + "LLVM type with size">; + +// Type constraint accepting any LLVM aggregate type, i.e. structure or array. +def LLVM_AnyAggregate : Type< + CPred<"$_self.isa<::mlir::LLVM::LLVMStructType, " + "::mlir::LLVM::LLVMArrayType>()">, + "LLVM aggregate type">; + +// Type constraint accepting any LLVM non-aggregate type, i.e. not structure or +// array. +def LLVM_AnyNonAggregate : Type, + "LLVM non-aggregate type">; + +// Type constraint accepting any LLVM vector type. +def LLVM_AnyVector : Type()">, + "LLVM vector type">; + +// Type constraint accepting an LLVM vector type with an additional constraint +// on the vector element type. +class LLVM_VectorOf : Type< + And<[LLVM_AnyVector.predicate, + SubstLeaves< + "$_self", + "$_self.cast<::mlir::LLVM::LLVMVectorType>().getElementType()", + element.predicate>]>, + "LLVM vector of " # element.description>; + +// Type constraint accepting a constrained type, or a vector of such types. +class LLVM_ScalarOrVectorOf : + AnyTypeOf<[element, LLVM_VectorOf]>; // Base class for LLVM operations. Defines the interface to the llvm::IRBuilder // used to translate to LLVM IR proper. @@ -85,6 +163,10 @@ class LLVM_OpBase traits = []> : string llvmBuilder = ""; } +//===----------------------------------------------------------------------===// +// Base classes for LLVM dialect operations. +//===----------------------------------------------------------------------===// + // Base class for LLVM operations. All operations get an "llvm." prefix in // their name automatically. LLVM operations have either zero or one result, // this class is specialized below for both cases and should not be used diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index b1dd7b1af0301..b5bf4ac779727 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -87,39 +87,50 @@ class LLVM_TerminatorOp traits = []> : LLVM_Op; // Class for arithmetic binary operations. -class LLVM_ArithmeticOp traits = []> : +class LLVM_ArithmeticOpBase traits = []> : LLVM_OneResultOp, - Arguments<(ins LLVM_Type:$lhs, LLVM_Type:$rhs)>, + Arguments<(ins LLVM_ScalarOrVectorOf:$lhs, + LLVM_ScalarOrVectorOf:$rhs)>, LLVM_Builder<"$res = builder." # builderFunc # "($lhs, $rhs);"> { - let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; + let parser = + [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; let printer = [{ mlir::impl::printOneResultOp(this->getOperation(), p); }]; } -class LLVM_UnaryArithmeticOp traits = []> : +class LLVM_IntArithmeticOp traits = []> : + LLVM_ArithmeticOpBase; +class LLVM_FloatArithmeticOp traits = []> : + LLVM_ArithmeticOpBase; + +// Class for arithmetic unary operations. +class LLVM_UnaryArithmeticOp traits = []> : LLVM_OneResultOp, - Arguments<(ins LLVM_Type:$operand)>, + Arguments<(ins type:$operand)>, LLVM_Builder<"$res = builder." # builderFunc # "($operand);"> { - let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; + let parser = + [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; let printer = [{ mlir::impl::printOneResultOp(this->getOperation(), p); }]; } // Integer binary operations. -def LLVM_AddOp : LLVM_ArithmeticOp<"add", "CreateAdd", [Commutative]>; -def LLVM_SubOp : LLVM_ArithmeticOp<"sub", "CreateSub">; -def LLVM_MulOp : LLVM_ArithmeticOp<"mul", "CreateMul", [Commutative]>; -def LLVM_UDivOp : LLVM_ArithmeticOp<"udiv", "CreateUDiv">; -def LLVM_SDivOp : LLVM_ArithmeticOp<"sdiv", "CreateSDiv">; -def LLVM_URemOp : LLVM_ArithmeticOp<"urem", "CreateURem">; -def LLVM_SRemOp : LLVM_ArithmeticOp<"srem", "CreateSRem">; -def LLVM_AndOp : LLVM_ArithmeticOp<"and", "CreateAnd">; -def LLVM_OrOp : LLVM_ArithmeticOp<"or", "CreateOr">; -def LLVM_XOrOp : LLVM_ArithmeticOp<"xor", "CreateXor">; -def LLVM_ShlOp : LLVM_ArithmeticOp<"shl", "CreateShl">; -def LLVM_LShrOp : LLVM_ArithmeticOp<"lshr", "CreateLShr">; -def LLVM_AShrOp : LLVM_ArithmeticOp<"ashr", "CreateAShr">; +def LLVM_AddOp : LLVM_IntArithmeticOp<"add", "CreateAdd", [Commutative]>; +def LLVM_SubOp : LLVM_IntArithmeticOp<"sub", "CreateSub">; +def LLVM_MulOp : LLVM_IntArithmeticOp<"mul", "CreateMul", [Commutative]>; +def LLVM_UDivOp : LLVM_IntArithmeticOp<"udiv", "CreateUDiv">; +def LLVM_SDivOp : LLVM_IntArithmeticOp<"sdiv", "CreateSDiv">; +def LLVM_URemOp : LLVM_IntArithmeticOp<"urem", "CreateURem">; +def LLVM_SRemOp : LLVM_IntArithmeticOp<"srem", "CreateSRem">; +def LLVM_AndOp : LLVM_IntArithmeticOp<"and", "CreateAnd">; +def LLVM_OrOp : LLVM_IntArithmeticOp<"or", "CreateOr">; +def LLVM_XOrOp : LLVM_IntArithmeticOp<"xor", "CreateXor">; +def LLVM_ShlOp : LLVM_IntArithmeticOp<"shl", "CreateShl">; +def LLVM_LShrOp : LLVM_IntArithmeticOp<"lshr", "CreateLShr">; +def LLVM_AShrOp : LLVM_IntArithmeticOp<"ashr", "CreateAShr">; // Predicate for integer comparisons. def ICmpPredicateEQ : I64EnumAttrCase<"eq", 0>; @@ -143,8 +154,9 @@ def ICmpPredicate : I64EnumAttr< // Other integer operations. def LLVM_ICmpOp : LLVM_OneResultOp<"icmp", [NoSideEffect]>, - Arguments<(ins ICmpPredicate:$predicate, LLVM_Type:$lhs, - LLVM_Type:$rhs)> { + Arguments<(ins ICmpPredicate:$predicate, + LLVM_ScalarOrVectorOf:$lhs, + LLVM_ScalarOrVectorOf:$rhs)> { let llvmBuilder = [{ $res = builder.CreateICmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; @@ -189,8 +201,9 @@ def FCmpPredicate : I64EnumAttr< // Other integer operations. def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>, - Arguments<(ins FCmpPredicate:$predicate, LLVM_Type:$lhs, - LLVM_Type:$rhs)> { + Arguments<(ins FCmpPredicate:$predicate, + LLVM_ScalarOrVectorOf:$lhs, + LLVM_ScalarOrVectorOf:$rhs)> { let llvmBuilder = [{ $res = builder.CreateFCmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; @@ -205,12 +218,13 @@ def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>, } // Floating point binary operations. -def LLVM_FAddOp : LLVM_ArithmeticOp<"fadd", "CreateFAdd">; -def LLVM_FSubOp : LLVM_ArithmeticOp<"fsub", "CreateFSub">; -def LLVM_FMulOp : LLVM_ArithmeticOp<"fmul", "CreateFMul">; -def LLVM_FDivOp : LLVM_ArithmeticOp<"fdiv", "CreateFDiv">; -def LLVM_FRemOp : LLVM_ArithmeticOp<"frem", "CreateFRem">; -def LLVM_FNegOp : LLVM_UnaryArithmeticOp<"fneg", "CreateFNeg">; +def LLVM_FAddOp : LLVM_FloatArithmeticOp<"fadd", "CreateFAdd">; +def LLVM_FSubOp : LLVM_FloatArithmeticOp<"fsub", "CreateFSub">; +def LLVM_FMulOp : LLVM_FloatArithmeticOp<"fmul", "CreateFMul">; +def LLVM_FDivOp : LLVM_FloatArithmeticOp<"fdiv", "CreateFDiv">; +def LLVM_FRemOp : LLVM_FloatArithmeticOp<"frem", "CreateFRem">; +def LLVM_FNegOp : LLVM_UnaryArithmeticOp, + "fneg", "CreateFNeg">; // Common code definition that is used to verify and set the alignment attribute // of LLVM ops that accept such an attribute. @@ -241,7 +255,8 @@ class MemoryOpWithAlignmentAndAttributes : MemoryOpWithAlignmentBase { def LLVM_AllocaOp : MemoryOpWithAlignmentBase, LLVM_OneResultOp<"alloca">, - Arguments<(ins LLVM_Type:$arraySize, OptionalAttr:$alignment)> { + Arguments<(ins LLVM_AnyInteger:$arraySize, + OptionalAttr:$alignment)> { string llvmBuilder = [{ auto *inst = builder.CreateAlloca( $_resultType->getPointerElementType(), $arraySize); @@ -259,8 +274,11 @@ def LLVM_AllocaOp : let parser = [{ return parseAllocaOp(parser, result); }]; let printer = [{ printAllocaOp(p, *this); }]; } + def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$base, Variadic:$indices)>, + Arguments<(ins LLVM_ScalarOrVectorOf:$base, + Variadic>:$indices)>, LLVM_Builder<"$res = builder.CreateGEP($base, $indices);"> { let assemblyFormat = [{ $base `[` $indices `]` attr-dict `:` functional-type(operands, results) @@ -269,7 +287,7 @@ def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>, def LLVM_LoadOp : MemoryOpWithAlignmentAndAttributes, LLVM_OneResultOp<"load">, - Arguments<(ins LLVM_Type:$addr, + Arguments<(ins LLVM_PointerTo:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, UnitAttr:$nontemporal)> { @@ -296,8 +314,8 @@ def LLVM_LoadOp : def LLVM_StoreOp : MemoryOpWithAlignmentAndAttributes, LLVM_ZeroResultOp<"store">, - Arguments<(ins LLVM_Type:$value, - LLVM_Type:$addr, + Arguments<(ins LLVM_LoadableType:$value, + LLVM_PointerTo:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, UnitAttr:$nontemporal)> { @@ -314,28 +332,41 @@ def LLVM_StoreOp : } // Casts. -class LLVM_CastOp traits = []> : LLVM_OneResultOp, - Arguments<(ins LLVM_Type:$arg)>, + Arguments<(ins type:$arg)>, LLVM_Builder<"$res = builder." # builderFunc # "($arg, $_resultType);"> { let parser = [{ return mlir::impl::parseCastOp(parser, result); }]; let printer = [{ mlir::impl::printCastOp(this->getOperation(), p); }]; } -def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "CreateBitCast">; -def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "CreateAddrSpaceCast">; -def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "CreateIntToPtr">; -def LLVM_PtrToIntOp : LLVM_CastOp<"ptrtoint", "CreatePtrToInt">; -def LLVM_SExtOp : LLVM_CastOp<"sext", "CreateSExt">; -def LLVM_ZExtOp : LLVM_CastOp<"zext", "CreateZExt">; -def LLVM_TruncOp : LLVM_CastOp<"trunc", "CreateTrunc">; -def LLVM_SIToFPOp : LLVM_CastOp<"sitofp", "CreateSIToFP">; -def LLVM_UIToFPOp : LLVM_CastOp<"uitofp", "CreateUIToFP">; -def LLVM_FPToSIOp : LLVM_CastOp<"fptosi", "CreateFPToSI">; -def LLVM_FPToUIOp : LLVM_CastOp<"fptoui", "CreateFPToUI">; -def LLVM_FPExtOp : LLVM_CastOp<"fpext", "CreateFPExt">; -def LLVM_FPTruncOp : LLVM_CastOp<"fptrunc", "CreateFPTrunc">; +def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "CreateBitCast", + LLVM_AnyNonAggregate>; +def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "CreateAddrSpaceCast", + LLVM_ScalarOrVectorOf>; +def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "CreateIntToPtr", + LLVM_ScalarOrVectorOf>; +def LLVM_PtrToIntOp : LLVM_CastOp<"ptrtoint", "CreatePtrToInt", + LLVM_ScalarOrVectorOf>; +def LLVM_SExtOp : LLVM_CastOp<"sext", "CreateSExt", + LLVM_ScalarOrVectorOf>; +def LLVM_ZExtOp : LLVM_CastOp<"zext", "CreateZExt", + LLVM_ScalarOrVectorOf>; +def LLVM_TruncOp : LLVM_CastOp<"trunc", "CreateTrunc", + LLVM_ScalarOrVectorOf>; +def LLVM_SIToFPOp : LLVM_CastOp<"sitofp", "CreateSIToFP", + LLVM_ScalarOrVectorOf>; +def LLVM_UIToFPOp : LLVM_CastOp<"uitofp", "CreateUIToFP", + LLVM_ScalarOrVectorOf>; +def LLVM_FPToSIOp : LLVM_CastOp<"fptosi", "CreateFPToSI", + LLVM_ScalarOrVectorOf>; +def LLVM_FPToUIOp : LLVM_CastOp<"fptoui", "CreateFPToUI", + LLVM_ScalarOrVectorOf>; +def LLVM_FPExtOp : LLVM_CastOp<"fpext", "CreateFPExt", + LLVM_ScalarOrVectorOf>; +def LLVM_FPTruncOp : LLVM_CastOp<"fptrunc", "CreateFPTrunc", + LLVM_ScalarOrVectorOf>; // Call-related operations. def LLVM_InvokeOp : LLVM_Op<"invoke", [ @@ -404,8 +435,8 @@ def LLVM_CallOp : LLVM_Op<"call">, let printer = [{ printCallOp(p, *this); }]; } def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$vector, - LLVM_Type:$position)> { + Arguments<(ins LLVM_AnyVector:$vector, + LLVM_AnyInteger:$position)> { string llvmBuilder = [{ $res = builder.CreateExtractElement($vector, $position); }]; @@ -416,8 +447,8 @@ def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>, let printer = [{ printExtractElementOp(p, *this); }]; } def LLVM_ExtractValueOp : LLVM_OneResultOp<"extractvalue", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$container, - ArrayAttr:$position)> { + Arguments<(ins LLVM_AnyAggregate:$container, + ArrayAttr:$position)> { string llvmBuilder = [{ $res = builder.CreateExtractValue($container, extractPosition($position)); }]; @@ -425,8 +456,9 @@ def LLVM_ExtractValueOp : LLVM_OneResultOp<"extractvalue", [NoSideEffect]>, let printer = [{ printExtractValueOp(p, *this); }]; } def LLVM_InsertElementOp : LLVM_OneResultOp<"insertelement", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$vector, LLVM_Type:$value, - LLVM_Type:$position)> { + Arguments<(ins LLVM_AnyVector:$vector, + LLVM_PrimitiveType:$value, + LLVM_AnyInteger:$position)> { string llvmBuilder = [{ $res = builder.CreateInsertElement($vector, $value, $position); }]; @@ -434,8 +466,9 @@ def LLVM_InsertElementOp : LLVM_OneResultOp<"insertelement", [NoSideEffect]>, let printer = [{ printInsertElementOp(p, *this); }]; } def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$container, LLVM_Type:$value, - ArrayAttr:$position)> { + Arguments<(ins LLVM_AnyAggregate:$container, + LLVM_PrimitiveType:$value, + ArrayAttr:$position)> { string llvmBuilder = [{ $res = builder.CreateInsertValue($container, $value, extractPosition($position)); @@ -451,7 +484,7 @@ def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>, } def LLVM_ShuffleVectorOp : LLVM_OneResultOp<"shufflevector", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$v1, LLVM_Type:$v2, ArrayAttr:$mask)> { + Arguments<(ins LLVM_AnyVector:$v1, LLVM_AnyVector:$v2, ArrayAttr:$mask)> { string llvmBuilder = [{ SmallVector position = extractPosition($mask); SmallVector mask(position.begin(), position.end()); @@ -478,8 +511,9 @@ def LLVM_ShuffleVectorOp def LLVM_SelectOp : LLVM_OneResultOp<"select", [NoSideEffect, AllTypesMatch<["trueValue", "falseValue", "res"]>]>, - Arguments<(ins LLVM_Type:$condition, LLVM_Type:$trueValue, - LLVM_Type:$falseValue)>, + Arguments<(ins LLVM_ScalarOrVectorOf:$condition, + LLVM_Type:$trueValue, + LLVM_Type:$falseValue)>, LLVM_Builder< "$res = builder.CreateSelect($condition, $trueValue, $falseValue);"> { let builders = [OpBuilder< @@ -508,7 +542,7 @@ def LLVM_BrOp : LLVM_TerminatorOp<"br", def LLVM_CondBrOp : LLVM_TerminatorOp<"cond_br", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, NoSideEffect]> { - let arguments = (ins LLVMI1:$condition, + let arguments = (ins LLVM_i1:$condition, Variadic:$trueDestOperands, Variadic:$falseDestOperands, OptionalAttr:$branch_weights); @@ -1090,9 +1124,11 @@ def AtomicOrdering : I64EnumAttr< let cppNamespace = "::mlir::LLVM"; } +def LLVM_AtomicRMWType : AnyTypeOf<[LLVM_AnyFloat, LLVM_AnyInteger]>; + def LLVM_AtomicRMWOp : LLVM_Op<"atomicrmw">, - Arguments<(ins AtomicBinOp:$bin_op, LLVM_Type:$ptr, LLVM_Type:$val, - AtomicOrdering:$ordering)>, + Arguments<(ins AtomicBinOp:$bin_op, LLVM_PointerTo:$ptr, + LLVM_AtomicRMWType:$val, AtomicOrdering:$ordering)>, Results<(outs LLVM_Type:$res)> { let llvmBuilder = [{ $res = builder.CreateAtomicRMW(getLLVMAtomicBinOp($bin_op), $ptr, $val, @@ -1103,8 +1139,11 @@ def LLVM_AtomicRMWOp : LLVM_Op<"atomicrmw">, let verifier = "return ::verify(*this);"; } +def LLVM_AtomicCmpXchgType : AnyTypeOf<[LLVM_AnyInteger, LLVM_AnyPointer]>; + def LLVM_AtomicCmpXchgOp : LLVM_Op<"cmpxchg">, - Arguments<(ins LLVM_Type:$ptr, LLVM_Type:$cmp, LLVM_Type:$val, + Arguments<(ins LLVM_PointerTo:$ptr, + LLVM_AtomicCmpXchgType:$cmp, LLVM_AtomicCmpXchgType:$val, AtomicOrdering:$success_ordering, AtomicOrdering:$failure_ordering)>, Results<(outs LLVM_Type:$res)> { diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 78aefec00bf76..59ba50fbe2322 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -19,7 +19,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def SCF_Dialect : Dialect { let name = "scf"; - let cppNamespace = ""; + let cppNamespace = "scf"; } // Base class for SCF dialect ops. @@ -39,7 +39,7 @@ class SCF_Op traits = []> : def ForOp : SCF_Op<"for", [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, - SingleBlockImplicitTerminator<"YieldOp">, + SingleBlockImplicitTerminator<"scf::YieldOp">, RecursiveSideEffects]> { let summary = "for operation"; let description = [{ @@ -183,7 +183,7 @@ def ForOp : SCF_Op<"for", def IfOp : SCF_Op<"if", [DeclareOpInterfaceMethods, - SingleBlockImplicitTerminator<"YieldOp">, RecursiveSideEffects, + SingleBlockImplicitTerminator<"scf::YieldOp">, RecursiveSideEffects, NoRegionArguments]> { let summary = "if-then-else operation"; let description = [{ @@ -271,7 +271,7 @@ def ParallelOp : SCF_Op<"parallel", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, RecursiveSideEffects, - SingleBlockImplicitTerminator<"YieldOp">]> { + SingleBlockImplicitTerminator<"scf::YieldOp">]> { let summary = "parallel for operation"; let description = [{ The "scf.parallel" operation represents a loop nest taking 4 groups of SSA diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td index ab0b761613425..21f926a1500c5 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td @@ -247,6 +247,24 @@ def QueryCapabilityInterface : OpInterface<"QueryCapabilityInterface"> { "getCapabilities">]; } +//===----------------------------------------------------------------------===// +// SPIR-V target GPU vendor and device definitions +//===----------------------------------------------------------------------===// + +// An accelerator other than GPU or CPU +def SPV_DT_Other : I32EnumAttrCase<"Other", 0>; +def SPV_DT_IntegratedGPU : I32EnumAttrCase<"IntegratedGPU", 1>; +def SPV_DT_DiscreteGPU : I32EnumAttrCase<"DiscreteGPU", 2>; +def SPV_DT_CPU : I32EnumAttrCase<"CPU", 3>; +// Information missing. +def SPV_DT_Unknown : I32EnumAttrCase<"Unknown", 0x7FFFFFFF>; + +def SPV_DeviceTypeAttr : SPV_I32EnumAttr< + "DeviceType", "valid SPIR-V device types", [ + SPV_DT_Other, SPV_DT_IntegratedGPU, SPV_DT_DiscreteGPU, + SPV_DT_CPU, SPV_DT_Unknown + ]>; + //===----------------------------------------------------------------------===// // SPIR-V extension definitions //===----------------------------------------------------------------------===// @@ -3252,6 +3270,8 @@ def SPV_OC_OpCooperativeMatrixLoadNV : I32EnumAttrCase<"OpCooperativeMatrixLoa def SPV_OC_OpCooperativeMatrixStoreNV : I32EnumAttrCase<"OpCooperativeMatrixStoreNV", 5360>; def SPV_OC_OpCooperativeMatrixMulAddNV : I32EnumAttrCase<"OpCooperativeMatrixMulAddNV", 5361>; def SPV_OC_OpCooperativeMatrixLengthNV : I32EnumAttrCase<"OpCooperativeMatrixLengthNV", 5362>; +def SPV_OC_OpSubgroupBlockReadINTEL : I32EnumAttrCase<"OpSubgroupBlockReadINTEL", 5575>; +def SPV_OC_OpSubgroupBlockWriteINTEL : I32EnumAttrCase<"OpSubgroupBlockWriteINTEL", 5576>; def SPV_OpcodeAttr : SPV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", [ @@ -3308,7 +3328,8 @@ def SPV_OpcodeAttr : SPV_OC_OpGroupNonUniformFMax, SPV_OC_OpSubgroupBallotKHR, SPV_OC_OpTypeCooperativeMatrixNV, SPV_OC_OpCooperativeMatrixLoadNV, SPV_OC_OpCooperativeMatrixStoreNV, SPV_OC_OpCooperativeMatrixMulAddNV, - SPV_OC_OpCooperativeMatrixLengthNV + SPV_OC_OpCooperativeMatrixLengthNV, SPV_OC_OpSubgroupBlockReadINTEL, + SPV_OC_OpSubgroupBlockWriteINTEL ]>; // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY! diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td index 6064cc3043596..10cafd8251166 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td @@ -265,6 +265,108 @@ def SPV_GLSLTanOp : SPV_GLSLUnaryArithmeticOp<"Tan", 15, SPV_Float16or32> { // ----- +def SPV_GLSLAsinOp : SPV_GLSLUnaryArithmeticOp<"Asin", 16, SPV_Float16or32> { + let summary = "Arc Sine of operand in radians"; + + let description = [{ + The standard trigonometric arc sine of x radians. + + Result is an angle, in radians, whose sine is x. The range of result values + is [-Ï€ / 2, Ï€ / 2]. Result is undefined if abs x > 1. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + asin-op ::= ssa-id `=` `spv.GLSL.Asin` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Asin %0 : f32 + %3 = spv.GLSL.Asin %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPV_GLSLAcosOp : SPV_GLSLUnaryArithmeticOp<"Acos", 17, SPV_Float16or32> { + let summary = "Arc Cosine of operand in radians"; + + let description = [{ + The standard trigonometric arc cosine of x radians. + + Result is an angle, in radians, whose cosine is x. The range of result + values is [0, Ï€]. Result is undefined if abs x > 1. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + acos-op ::= ssa-id `=` `spv.GLSL.Acos` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Acos %0 : f32 + %3 = spv.GLSL.Acos %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPV_GLSLAtanOp : SPV_GLSLUnaryArithmeticOp<"Atan", 18, SPV_Float16or32> { + let summary = "Arc Tangent of operand in radians"; + + let description = [{ + The standard trigonometric arc tangent of x radians. + + Result is an angle, in radians, whose tangent is y_over_x. The range of + result values is [-Ï€ / 2, Ï€ / 2]. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + atan-op ::= ssa-id `=` `spv.GLSL.Atan` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Atan %0 : f32 + %3 = spv.GLSL.Atan %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLExpOp : SPV_GLSLUnaryArithmeticOp<"Exp", 27, SPV_Float16or32> { let summary = "Exponentiation of Operand 1"; @@ -328,6 +430,36 @@ def SPV_GLSLFloorOp : SPV_GLSLUnaryArithmeticOp<"Floor", 8, SPV_Float> { // ----- +def SPV_GLSLRoundOp: SPV_GLSLUnaryArithmeticOp<"Round", 1, SPV_Float> { + let summary = "Rounds to the whole number"; + + let description = [{ + Result is the value equal to the nearest whole number. + + The operand x must be a scalar or vector whose component type is + floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + + ``` + float-scalar-vector-type ::= float-type | + `vector<` integer-literal `x` float-type `>` + floor-op ::= ssa-id `=` `spv.GLSL.Round` ssa-use `:` + float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Round %0 : f32 + %3 = spv.GLSL.Round %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLInverseSqrtOp : SPV_GLSLUnaryArithmeticOp<"InverseSqrt", 32, SPV_Float> { let summary = "Reciprocal of sqrt(operand)"; @@ -513,6 +645,40 @@ def SPV_GLSLSMinOp : SPV_GLSLBinaryArithmeticOp<"SMin", 39, SPV_Integer> { // ----- +def SPV_GLSLPowOp : SPV_GLSLBinaryArithmeticOp<"Pow", 26, SPV_Float16or32> { + let summary = "Return x raised to the y power of two operands"; + + let description = [{ + Result is x raised to the y power; x^y. + + Result is undefined if x = 0 and y ≤ 0. + + The operand x and y must be a scalar or vector whose component type is + 16-bit or 32-bit floating-point. + + Result Type and the type of all operands must be the same type. Results are + computed per component. + + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + pow-op ::= ssa-id `=` `spv.GLSL.Pow` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Pow %0, %1 : f32 + %3 = spv.GLSL.Pow %0, %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLFSignOp : SPV_GLSLUnaryArithmeticOp<"FSign", 6, SPV_Float> { let summary = "Returns the sign of the operand"; @@ -602,6 +768,70 @@ def SPV_GLSLSqrtOp : SPV_GLSLUnaryArithmeticOp<"Sqrt", 31, SPV_Float> { // ----- +def SPV_GLSLSinhOp : SPV_GLSLUnaryArithmeticOp<"Sinh", 19, SPV_Float16or32> { + let summary = "Hyperbolic sine of operand in radians"; + + let description = [{ + Hyperbolic sine of x radians. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + sinh-op ::= ssa-id `=` `spv.GLSL.Sinh` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Sinh %0 : f32 + %3 = spv.GLSL.Sinh %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPV_GLSLCoshOp : SPV_GLSLUnaryArithmeticOp<"Cosh", 20, SPV_Float16or32> { + let summary = "Hyperbolic cosine of operand in radians"; + + let description = [{ + Hyperbolic cosine of x radians. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + cosh-op ::= ssa-id `=` `spv.GLSL.Cosh` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Cosh %0 : f32 + %3 = spv.GLSL.Cosh %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLTanhOp : SPV_GLSLUnaryArithmeticOp<"Tanh", 21, SPV_Float16or32> { let summary = "Hyperbolic tangent of operand in radians"; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td index c9ce8be9927f1..7eab3b44601ed 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td @@ -88,7 +88,6 @@ def SPV_GroupBroadcastOp : SPV_Op<"GroupBroadcast", let assemblyFormat = [{ $execution_scope operands attr-dict `:` type($value) `,` type($localid) }]; - } // ----- @@ -147,4 +146,104 @@ def SPV_SubgroupBallotKHROp : SPV_Op<"SubgroupBallotKHR", []> { // ----- +def SPV_SubgroupBlockReadINTELOp : SPV_Op<"SubgroupBlockReadINTEL", []> { + let summary = "See extension SPV_INTEL_subgroups"; + + let description = [{ + Reads one or more components of Result data for each invocation in the + subgroup from the specified Ptr as a block operation. + + The data is read strided, so the first value read is: + Ptr[ SubgroupLocalInvocationId ] + + and the second value read is: + Ptr[ SubgroupLocalInvocationId + SubgroupMaxSize ] + etc. + + Result Type may be a scalar or vector type, and its component type must be + equal to the type pointed to by Ptr. + + The type of Ptr must be a pointer type, and must point to a scalar type. + + + + ``` + subgroup-block-read-INTEL-op ::= ssa-id `=` `spv.SubgroupBlockReadINTEL` + storage-class ssa_use `:` spirv-element-type + ```mlir + + #### Example: + + ``` + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : i32 + ``` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_subgroups]>, + Capability<[SPV_C_SubgroupBufferBlockIOINTEL]> + ]; + + let arguments = (ins + SPV_AnyPtr:$ptr + ); + + let results = (outs + SPV_Type:$value + ); +} + +// ----- + +def SPV_SubgroupBlockWriteINTELOp : SPV_Op<"SubgroupBlockWriteINTEL", []> { + let summary = "See extension SPV_INTEL_subgroups"; + + let description = [{ + Writes one or more components of Data for each invocation in the subgroup + from the specified Ptr as a block operation. + + The data is written strided, so the first value is written to: + Ptr[ SubgroupLocalInvocationId ] + + and the second value written is: + Ptr[ SubgroupLocalInvocationId + SubgroupMaxSize ] + etc. + + The type of Ptr must be a pointer type, and must point to a scalar type. + + The component type of Data must be equal to the type pointed to by Ptr. + + + + ``` + subgroup-block-write-INTEL-op ::= ssa-id `=` `spv.SubgroupBlockWriteINTEL` + storage-class ssa_use `,` ssa-use `:` spirv-element-type + ```mlir + + #### Example: + + ``` + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : i32 + ``` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_subgroups]>, + Capability<[SPV_C_SubgroupBufferBlockIOINTEL]> + ]; + + let arguments = (ins + SPV_AnyPtr:$ptr, + SPV_Type:$value + ); + + let results = (outs); +} + +// ----- + #endif // SPIRV_GROUP_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h index e276123c4bb54..5ef999d1b9fe1 100644 --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h @@ -29,6 +29,8 @@ class TargetEnv { public: explicit TargetEnv(TargetEnvAttr targetAttr); + DeviceType getDeviceType(); + Version getVersion(); /// Returns true if the given capability is allowed. diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td index 231ec54f09f43..af4da692c5de5 100644 --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td @@ -39,12 +39,37 @@ def SPV_CapabilityArrayAttr : TypedArrayAttrBase< // This attribute specifies the limits for various resources on the target // architecture. // -// See https://renderdoc.org/vkspec_chunked/chap36.html#limits for the complete -// list of limits and their explanation for the Vulkan API. The following ones -// are those affecting SPIR-V CodeGen. +// See https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#limits +// for the complete list of limits and their explanation for the Vulkan API. +// The following ones are those affecting SPIR-V CodeGen. Their default value +// are the from Vulkan limit requirements: +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#limits-minmax def SPV_ResourceLimitsAttr : StructAttr<"ResourceLimitsAttr", SPIRV_Dialect, [ - StructFieldAttr<"max_compute_workgroup_invocations", I32Attr>, - StructFieldAttr<"max_compute_workgroup_size", I32ElementsAttr> + // Unique identifier for the vendor and target GPU. + // 0x7FFFFFFF means unknown. + StructFieldAttr<"vendor_id", DefaultValuedAttr>, + StructFieldAttr<"device_id", DefaultValuedAttr>, + // Target device type. + StructFieldAttr<"device_type", + DefaultValuedAttr>, + + // The maximum total storage size, in bytes, available for variables + // declared with the Workgroup storage class. + StructFieldAttr<"max_compute_shared_memory_size", + DefaultValuedAttr>, + + // The maximum total number of compute shader invocations in a single local + // workgroup. + StructFieldAttr<"max_compute_workgroup_invocations", + DefaultValuedAttr>, + // The maximum size of a local compute workgroup, per dimension. + StructFieldAttr<"max_compute_workgroup_size", + DefaultValuedAttr>, + + // The default number of invocations in each subgroup. + // 0x7FFFFFFF means unknown. + StructFieldAttr<"subgroup_size", DefaultValuedAttr> ]>; #endif // SPIRV_TARGET_AND_ABI diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index ae951e824e001..f326ae5578650 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1475,6 +1475,37 @@ def DivFOp : FloatArithmeticOp<"divf"> { let summary = "floating point division operation"; } +//===----------------------------------------------------------------------===// +// DynamicTensorFromElementsOp +//===----------------------------------------------------------------------===// + +def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", + [RecursiveSideEffects, SingleBlockImplicitTerminator<"YieldOp">]> { + string summary = "Creates a dynamically sized tensor from elements"; + string description = [{ + This operation creates a dynamically sized tensor with elements of any type. + It expects one index operand per dynamic extent of the result tensor. + + The body region defines the tensor's elements. It takes index operands as + its region arguments that span the index space. The element at the given + position is yielded with the `yield` operation (see `YieldOp`). + + Example: + + ```mlir + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + ... + yield %elem : f32 + } : tensor + ``` + }]; + + let arguments = (ins Variadic:$dynamicExtents); + let results = (outs AnyRankedTensor:$result); + let regions = (region SizedRegion<1>:$body); +} + //===----------------------------------------------------------------------===// // ExpOp //===----------------------------------------------------------------------===// @@ -3252,6 +3283,24 @@ def ViewOp : Std_Op<"view", [ let hasCanonicalizer = 1; } +//===----------------------------------------------------------------------===// +// YieldOp +//===----------------------------------------------------------------------===// + +def YieldOp : Std_Op<"yield", [NoSideEffect, ReturnLike, Terminator, + HasParent<"DynamicTensorFromElementsOp">]> { + let summary = "Yield a value from a region"; + let description = [{ + This operation is used to yield a single value from a within a region. It + is used to create dynamically sized tensors + (see `DynamicTensorFromElementsOp`). + }]; + + let arguments = (ins AnyType:$value); + let assemblyFormat = "$value attr-dict `:` type($value)"; + let verifier = ?; +} + //===----------------------------------------------------------------------===// // XOrOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h index f353262750345..269d9c1b27af0 100644 --- a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h @@ -16,10 +16,16 @@ namespace intrinsics { using vector_broadcast = ValueBuilder; using vector_contract = ValueBuilder; -using vector_insert = ValueBuilder; -using vector_fma = ValueBuilder; using vector_extract = ValueBuilder; -using vector_extractelement = ValueBuilder; +using vector_extract_element = ValueBuilder; +using vector_extract_slices = ValueBuilder; +using vector_extract_strided_slice = + ValueBuilder; +using vector_fma = ValueBuilder; +using vector_insert = ValueBuilder; +using vector_insert_element = ValueBuilder; +using vector_insert_slices = ValueBuilder; +using vector_insert_strided_slice = ValueBuilder; using vector_matmul = ValueBuilder; using vector_outerproduct = ValueBuilder; using vector_print = OperationBuilder; @@ -27,11 +33,6 @@ using vector_transfer_read = ValueBuilder; using vector_transfer_write = OperationBuilder; using vector_transpose = ValueBuilder; using vector_type_cast = ValueBuilder; -using vector_extract_slices = ValueBuilder; -using vector_insert_slices = ValueBuilder; -using vector_extract_strided_slice = - ValueBuilder; -using vector_insert_strided_slice = ValueBuilder; } // namespace intrinsics } // namespace edsc diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 22fd036df8148..dceb850ad929c 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -348,15 +348,21 @@ def Vector_ExtractElementOp : %1 = vector.extractelement %0[%c : i32]: vector<16xf32> ``` }]; + let assemblyFormat = [{ + $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector) + }]; + + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "int64_t position">, + OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "Value position">]; let extraClassDeclaration = [{ VectorType getVectorType() { return vector().getType().cast(); } }]; - - let assemblyFormat = [{ - $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector) - }]; } def Vector_ExtractOp : @@ -508,6 +514,17 @@ def Vector_InsertElementOp : %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32> ``` }]; + let assemblyFormat = [{ + $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:` + type($result) + }]; + + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "Value dest, int64_t position">, + OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "Value dest, Value position">]; let extraClassDeclaration = [{ Type getSourceType() { return source().getType(); } VectorType getDestVectorType() { @@ -515,10 +532,6 @@ def Vector_InsertElementOp : } }]; - let assemblyFormat = [{ - $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:` - type($result) - }]; } def Vector_InsertOp : diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h index 1f21af617e4d2..70c948d99cda8 100644 --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -190,7 +190,7 @@ class TemplatedIndexedValue { TemplatedIndexedValue operator()(Value index, Args... indices) { return TemplatedIndexedValue(value, index).append(indices...); } - TemplatedIndexedValue operator()(ArrayRef indices) { + TemplatedIndexedValue operator()(ValueRange indices) { return TemplatedIndexedValue(value, indices); } @@ -319,7 +319,7 @@ class TemplatedIndexedValue { } private: - TemplatedIndexedValue(Value value, ArrayRef indices) + TemplatedIndexedValue(Value value, ValueRange indices) : value(value), indices(indices.begin(), indices.end()) {} TemplatedIndexedValue &append() { return *this; } diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index aa1cc0a1a2b47..0c30869752ea3 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -328,6 +328,20 @@ class OpBuilder : public Builder { setInsertionPoint(op->getBlock(), ++Block::iterator(op)); } + /// Sets the insertion point to the node after the specified value. If value + /// has a defining operation, sets the insertion point to the node after such + /// defining operation. This will cause subsequent insertions to go right + /// after it. Otherwise, value is a BlockArgumen. Sets the insertion point to + /// the start of its block. + void setInsertionPointAfter(Value val) { + if (Operation *op = val.getDefiningOp()) { + setInsertionPointAfter(op); + } else { + auto blockArg = val.cast(); + setInsertionPointToStart(blockArg.getOwner()); + } + } + /// Sets the insertion point to the start of the specified block. void setInsertionPointToStart(Block *block) { setInsertionPoint(block, block->begin()); diff --git a/mlir/include/mlir/IR/Identifier.h b/mlir/include/mlir/IR/Identifier.h index ca1946bd8ad02..353dbc9020875 100644 --- a/mlir/include/mlir/IR/Identifier.h +++ b/mlir/include/mlir/IR/Identifier.h @@ -67,6 +67,9 @@ class Identifier { return Identifier(static_cast(entry)); } + /// Compare the underlying StringRef. + int compare(Identifier rhs) const { return strref().compare(rhs.strref()); } + private: /// This contains the bytes of the string, which is guaranteed to be nul /// terminated. diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index b0e1205eefe6f..7fce4b808d2e4 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -82,7 +82,7 @@ class AbstractOperation { using OperationProperties = uint32_t; /// This is the name of the operation. - const StringRef name; + const Identifier name; /// This is the dialect that this operation belongs to. Dialect &dialect; @@ -171,13 +171,7 @@ class AbstractOperation { SmallVectorImpl &results), void (&getCanonicalizationPatterns)(OwningRewritePatternList &results, MLIRContext *context), - detail::InterfaceMap &&interfaceMap, bool (&hasTrait)(TypeID traitID)) - : name(name), dialect(dialect), typeID(typeID), - parseAssembly(parseAssembly), printAssembly(printAssembly), - verifyInvariants(verifyInvariants), foldHook(foldHook), - getCanonicalizationPatterns(getCanonicalizationPatterns), - opProperties(opProperties), interfaceMap(std::move(interfaceMap)), - hasRawTrait(hasTrait) {} + detail::InterfaceMap &&interfaceMap, bool (&hasTrait)(TypeID traitID)); /// The properties of the operation. const OperationProperties opProperties; @@ -302,9 +296,12 @@ class OperationName { /// Return the operation name with dialect name stripped, if it has one. StringRef stripDialect() const; - /// Return the name of this operation. This always succeeds. + /// Return the name of this operation. This always succeeds. StringRef getStringRef() const; + /// Return the name of this operation as an identifier. This always succeeds. + Identifier getIdentifier() const; + /// If this operation has a registered operation description, return it. /// Otherwise return null. const AbstractOperation *getAbstractOperation() const; diff --git a/mlir/include/mlir/Pass/PassInstrumentation.h b/mlir/include/mlir/Pass/PassInstrumentation.h index dc648b2b0edfb..baf230f086fd1 100644 --- a/mlir/include/mlir/Pass/PassInstrumentation.h +++ b/mlir/include/mlir/Pass/PassInstrumentation.h @@ -9,12 +9,12 @@ #ifndef MLIR_PASS_PASSINSTRUMENTATION_H_ #define MLIR_PASS_PASSINSTRUMENTATION_H_ +#include "mlir/IR/Identifier.h" #include "mlir/Support/LLVM.h" #include "mlir/Support/TypeID.h" namespace mlir { class Operation; -class OperationName; class Pass; namespace detail { @@ -43,13 +43,13 @@ class PassInstrumentation { /// A callback to run before a pass pipeline is executed. This function takes /// the name of the operation type being operated on, and information related /// to the parent that spawned this pipeline. - virtual void runBeforePipeline(const OperationName &name, + virtual void runBeforePipeline(Identifier name, const PipelineParentInfo &parentInfo) {} /// A callback to run after a pass pipeline has executed. This function takes /// the name of the operation type being operated on, and information related /// to the parent that spawned this pipeline. - virtual void runAfterPipeline(const OperationName &name, + virtual void runAfterPipeline(Identifier name, const PipelineParentInfo &parentInfo) {} /// A callback to run before a pass is executed. This function takes a pointer @@ -90,12 +90,12 @@ class PassInstrumentor { /// See PassInstrumentation::runBeforePipeline for details. void - runBeforePipeline(const OperationName &name, + runBeforePipeline(Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo); /// See PassInstrumentation::runAfterPipeline for details. void - runAfterPipeline(const OperationName &name, + runAfterPipeline(Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo); /// See PassInstrumentation::runBeforePass for details. diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index e19a1fab7f130..9aace79f2053f 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -26,9 +26,9 @@ class Any; namespace mlir { class AnalysisManager; +class Identifier; class MLIRContext; class ModuleOp; -class OperationName; class Operation; class Pass; class PassInstrumentation; @@ -47,7 +47,8 @@ struct OpPassManagerImpl; /// other OpPassManagers or the top-level PassManager. class OpPassManager { public: - OpPassManager(OperationName name, bool verifyPasses); + OpPassManager(Identifier name, bool verifyPasses); + OpPassManager(StringRef name, bool verifyPasses); OpPassManager(OpPassManager &&rhs); OpPassManager(const OpPassManager &rhs); ~OpPassManager(); @@ -70,7 +71,7 @@ class OpPassManager { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. - OpPassManager &nest(const OperationName &nestedName); + OpPassManager &nest(Identifier nestedName); OpPassManager &nest(StringRef nestedName); template OpPassManager &nest() { return nest(OpT::getOperationName()); @@ -89,11 +90,11 @@ class OpPassManager { /// Returns the number of passes held by this manager. size_t size() const; - /// Return an instance of the context. - MLIRContext *getContext() const; + /// Return the operation name that this pass manager operates on. + Identifier getOpName(MLIRContext &context) const; /// Return the operation name that this pass manager operates on. - const OperationName &getOpName() const; + StringRef getOpName() const; /// Returns the internal implementation instance. detail::OpPassManagerImpl &getImpl(); @@ -151,6 +152,9 @@ class PassManager : public OpPassManager { LLVM_NODISCARD LogicalResult run(ModuleOp module); + /// Return an instance of the context. + MLIRContext *getContext() const { return context; } + /// Enable support for the pass manager to generate a reproducer on the event /// of a crash or a pass failure. `outputFile` is a .mlir filename used to /// write the generated reproducer. If `genLocalReproducer` is true, the pass @@ -304,6 +308,9 @@ class PassManager : public OpPassManager { runWithCrashRecovery(MutableArrayRef> passes, ModuleOp module, AnalysisManager am); + /// Context this PassManager was initialized with. + MLIRContext *context; + /// Flag that specifies if pass statistics should be dumped. Optional passStatisticsMode; diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index 8fc254e6be1e2..b3db7794fd971 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -108,7 +108,7 @@ class BufferAssignmentTypeConverter : public TypeConverter { "Only the memref typed values can be set to be appended to the " "function argument list at the moment"); resultTypeConversions.emplace_back( - [&](Type origin, Type input) -> Optional { + [=](Type origin, Type input) -> Optional { if (origin.template isa() && input.template isa()) return kind; return llvm::None; diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir new file mode 100644 index 0000000000000..8d965779dfc6d --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#map0 = affine_map<(d0, d1) -> (d1, d0)> + +func @print_memref_f32(memref<*xf32>) + +func @alloc_2d_filled_f32(%arg0: index, %arg1: index) -> memref { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c10 = constant 10 : index + %c100 = constant 100 : index + %0 = alloc(%arg0, %arg1) : memref + scf.for %arg5 = %c0 to %arg0 step %c1 { + scf.for %arg6 = %c0 to %arg1 step %c1 { + %arg66 = muli %arg6, %c100 : index + %tmp1 = addi %arg5, %arg66 : index + %tmp2 = index_cast %tmp1 : index to i32 + %tmp3 = sitofp %tmp2 : i32 to f32 + store %tmp3, %0[%arg5, %arg6] : memref + } + } + return %0 : memref +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c6 = constant 6 : index + %cst = constant -4.2e+01 : f32 + %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref + %converted = memref_cast %0 : memref to memref<*xf32> + call @print_memref_f32(%converted): (memref<*xf32>) -> () + // CHECK: Unranked{{.*}}data = + // CHECK: [ + // CHECK-SAME: [0, 100, 200, 300, 400, 500], + // CHECK-NEXT: [1, 101, 201, 301, 401, 501], + // CHECK-NEXT: [2, 102, 202, 302, 402, 502], + // CHECK-NEXT: [3, 103, 203, 303, 403, 503], + // CHECK-NEXT: [4, 104, 204, 304, 404, 504], + // CHECK-NEXT: [5, 105, 205, 305, 405, 505]] + + %init = vector.transfer_read %0[%c1, %c1], %cst : memref, vector<5x5xf32> + vector.print %init : vector<5x5xf32> + // 5x5 block rooted at {1, 1} + // CHECK-NEXT: ( ( 101, 201, 301, 401, 501 ), + // CHECK-SAME: ( 102, 202, 302, 402, 502 ), + // CHECK-SAME: ( 103, 203, 303, 403, 503 ), + // CHECK-SAME: ( 104, 204, 304, 404, 504 ), + // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + + %1 = vector.transfer_read %0[%c1, %c1], %cst {permutation_map = #map0} : memref, vector<5x5xf32> + vector.print %1 : vector<5x5xf32> + // Transposed 5x5 block rooted @{1, 1} in memory. + // CHECK-NEXT: ( ( 101, 102, 103, 104, 105 ), + // CHECK-SAME: ( 201, 202, 203, 204, 205 ), + // CHECK-SAME: ( 301, 302, 303, 304, 305 ), + // CHECK-SAME: ( 401, 402, 403, 404, 405 ), + // CHECK-SAME: ( 501, 502, 503, 504, 505 ) ) + + // Transpose-write the transposed 5x5 block @{0, 0} in memory. + vector.transfer_write %1, %0[%c0, %c0] {permutation_map = #map0} : vector<5x5xf32>, memref + + %2 = vector.transfer_read %0[%c1, %c1], %cst : memref, vector<5x5xf32> + vector.print %2 : vector<5x5xf32> + // New 5x5 block rooted @{1, 1} in memory. + // Here we expect the boundaries from the original data + // (i.e. last row: 105 .. 505, last col: 501 .. 505) + // and the 4x4 subblock 202 .. 505 rooted @{0, 0} in the vector + // CHECK-NEXT: ( ( 202, 302, 402, 502, 501 ), + // CHECK-SAME: ( 203, 303, 403, 503, 502 ), + // CHECK-SAME: ( 204, 304, 404, 504, 503 ), + // CHECK-SAME: ( 205, 305, 405, 505, 504 ), + // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + + dealloc %0 : memref + return +} diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index 70c1a28e92bef..bf1235a77d08c 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -221,34 +221,37 @@ namespace { /// CRTP base classes for Python attributes that subclass Attribute and should /// be castable from it (i.e. via something like StringAttr(attr)). -template -class PyConcreteAttribute : public PyAttribute { +/// By default, attribute class hierarchies are one level deep (i.e. a +/// concrete attribute class extends PyAttribute); however, intermediate +/// python-visible base classes can be modeled by specifying a BaseTy. +template +class PyConcreteAttribute : public BaseTy { public: // Derived classes must define statics for: // IsAFunctionTy isaFunction // const char *pyClassName - using ClassTy = py::class_; + using ClassTy = py::class_; using IsAFunctionTy = int (*)(MlirAttribute); PyConcreteAttribute() = default; - PyConcreteAttribute(MlirAttribute attr) : PyAttribute(attr) {} + PyConcreteAttribute(MlirAttribute attr) : BaseTy(attr) {} PyConcreteAttribute(PyAttribute &orig) : PyConcreteAttribute(castFrom(orig)) {} static MlirAttribute castFrom(PyAttribute &orig) { - if (!T::isaFunction(orig.attr)) { + if (!DerivedTy::isaFunction(orig.attr)) { auto origRepr = py::repr(py::cast(orig)).cast(); throw SetPyError(PyExc_ValueError, llvm::Twine("Cannot cast attribute to ") + - T::pyClassName + " (from " + origRepr + ")"); + DerivedTy::pyClassName + " (from " + origRepr + ")"); } return orig.attr; } static void bind(py::module &m) { - auto cls = ClassTy(m, T::pyClassName); + auto cls = ClassTy(m, DerivedTy::pyClassName); cls.def(py::init(), py::keep_alive<0, 1>()); - T::bindDerived(cls); + DerivedTy::bindDerived(cls); } /// Implemented by derived classes to add methods to the Python subclass. @@ -301,33 +304,36 @@ namespace { /// CRTP base classes for Python types that subclass Type and should be /// castable from it (i.e. via something like IntegerType(t)). -template -class PyConcreteType : public PyType { +/// By default, type class hierarchies are one level deep (i.e. a +/// concrete type class extends PyType); however, intermediate python-visible +/// base classes can be modeled by specifying a BaseTy. +template +class PyConcreteType : public BaseTy { public: // Derived classes must define statics for: // IsAFunctionTy isaFunction // const char *pyClassName - using ClassTy = py::class_; + using ClassTy = py::class_; using IsAFunctionTy = int (*)(MlirType); PyConcreteType() = default; - PyConcreteType(MlirType t) : PyType(t) {} - PyConcreteType(PyType &orig) : PyType(castFrom(orig)) {} + PyConcreteType(MlirType t) : BaseTy(t) {} + PyConcreteType(PyType &orig) : PyConcreteType(castFrom(orig)) {} static MlirType castFrom(PyType &orig) { - if (!T::isaFunction(orig.type)) { + if (!DerivedTy::isaFunction(orig.type)) { auto origRepr = py::repr(py::cast(orig)).cast(); throw SetPyError(PyExc_ValueError, llvm::Twine("Cannot cast type to ") + - T::pyClassName + " (from " + - origRepr + ")"); + DerivedTy::pyClassName + + " (from " + origRepr + ")"); } return orig.type; } static void bind(py::module &m) { - auto cls = ClassTy(m, T::pyClassName); + auto cls = ClassTy(m, DerivedTy::pyClassName); cls.def(py::init(), py::keep_alive<0, 1>()); - T::bindDerived(cls); + DerivedTy::bindDerived(cls); } /// Implemented by derived classes to add methods to the Python subclass. @@ -516,8 +522,81 @@ class PyComplexType : public PyConcreteType { } }; +class PyShapedType : public PyConcreteType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAShaped; + static constexpr const char *pyClassName = "ShapedType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_property_readonly( + "element_type", + [](PyShapedType &self) { + MlirType t = mlirShapedTypeGetElementType(self.type); + return PyType(t); + }, + py::keep_alive<0, 1>(), "Returns the element type of the shaped type."); + c.def_property_readonly( + "has_rank", + [](PyShapedType &self) -> bool { + return mlirShapedTypeHasRank(self.type); + }, + "Returns whether the given shaped type is ranked."); + c.def_property_readonly( + "rank", + [](PyShapedType &self) { + self.requireHasRank(); + return mlirShapedTypeGetRank(self.type); + }, + "Returns the rank of the given ranked shaped type."); + c.def_property_readonly( + "has_static_shape", + [](PyShapedType &self) -> bool { + return mlirShapedTypeHasStaticShape(self.type); + }, + "Returns whether the given shaped type has a static shape."); + c.def( + "is_dynamic_dim", + [](PyShapedType &self, intptr_t dim) -> bool { + self.requireHasRank(); + return mlirShapedTypeIsDynamicDim(self.type, dim); + }, + "Returns whether the dim-th dimension of the given shaped type is " + "dynamic."); + c.def( + "get_dim_size", + [](PyShapedType &self, intptr_t dim) { + self.requireHasRank(); + return mlirShapedTypeGetDimSize(self.type, dim); + }, + "Returns the dim-th dimension of the given ranked shaped type."); + c.def_static( + "is_dynamic_size", + [](int64_t size) -> bool { return mlirShapedTypeIsDynamicSize(size); }, + "Returns whether the given dimension size indicates a dynamic " + "dimension."); + c.def( + "is_dynamic_stride_or_offset", + [](PyShapedType &self, int64_t val) -> bool { + self.requireHasRank(); + return mlirShapedTypeIsDynamicStrideOrOffset(val); + }, + "Returns whether the given value is used as a placeholder for dynamic " + "strides and offsets in shaped types."); + } + +private: + void requireHasRank() { + if (!mlirShapedTypeHasRank(type)) { + throw SetPyError( + PyExc_ValueError, + "calling this method requires that the type has a rank."); + } + } +}; + /// Vector Type subclass - VectorType. -class PyVectorType : public PyConcreteType { +class PyVectorType : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAVector; static constexpr const char *pyClassName = "VectorType"; @@ -526,23 +605,175 @@ class PyVectorType : public PyConcreteType { static void bindDerived(ClassTy &c) { c.def_static( "get_vector", - [](std::vector shape, PyType &elementType) { - // The element must be a floating point or integer scalar type. - if (mlirTypeIsAIntegerOrFloat(elementType.type)) { - MlirType t = - mlirVectorTypeGet(shape.size(), shape.data(), elementType.type); - return PyVectorType(t); + // TODO: Make the location optional and create a default location. + [](std::vector shape, PyType &elementType, PyLocation &loc) { + MlirType t = mlirVectorTypeGetChecked(shape.size(), shape.data(), + elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point or integer type."); } - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point or integer type."); + return PyVectorType(t); }, py::keep_alive<0, 2>(), "Create a vector type"); } }; +/// Ranked Tensor Type subclass - RankedTensorType. +class PyRankedTensorType + : public PyConcreteType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsARankedTensor; + static constexpr const char *pyClassName = "RankedTensorType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_ranked_tensor", + // TODO: Make the location optional and create a default location. + [](std::vector shape, PyType &elementType, PyLocation &loc) { + MlirType t = mlirRankedTensorTypeGetChecked( + shape.size(), shape.data(), elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyRankedTensorType(t); + }, + py::keep_alive<0, 2>(), "Create a ranked tensor type"); + } +}; + +/// Unranked Tensor Type subclass - UnrankedTensorType. +class PyUnrankedTensorType + : public PyConcreteType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAUnrankedTensor; + static constexpr const char *pyClassName = "UnrankedTensorType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_unranked_tensor", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, PyLocation &loc) { + MlirType t = + mlirUnrankedTensorTypeGetChecked(elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyUnrankedTensorType(t); + }, + py::keep_alive<0, 1>(), "Create a unranked tensor type"); + } +}; + +/// Ranked MemRef Type subclass - MemRefType. +class PyMemRefType : public PyConcreteType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsARankedTensor; + static constexpr const char *pyClassName = "MemRefType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + // TODO: Add mlirMemRefTypeGet and mlirMemRefTypeGetAffineMap binding + // once the affine map binding is completed. + c.def_static( + "get_contiguous_memref", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, std::vector shape, + unsigned memorySpace, PyLocation &loc) { + MlirType t = mlirMemRefTypeContiguousGetChecked( + elementType.type, shape.size(), shape.data(), memorySpace, + loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyMemRefType(t); + }, + py::keep_alive<0, 1>(), "Create a memref type") + .def_property_readonly( + "num_affine_maps", + [](PyMemRefType &self) -> intptr_t { + return mlirMemRefTypeGetNumAffineMaps(self.type); + }, + "Returns the number of affine layout maps in the given MemRef " + "type.") + .def_property_readonly( + "memory_space", + [](PyMemRefType &self) -> unsigned { + return mlirMemRefTypeGetMemorySpace(self.type); + }, + "Returns the memory space of the given MemRef type."); + } +}; + +/// Unranked MemRef Type subclass - UnrankedMemRefType. +class PyUnrankedMemRefType + : public PyConcreteType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAUnrankedMemRef; + static constexpr const char *pyClassName = "UnrankedMemRefType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_unranked_memref", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, unsigned memorySpace, PyLocation &loc) { + MlirType t = mlirUnrankedMemRefTypeGetChecked(elementType.type, + memorySpace, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyUnrankedMemRefType(t); + }, + py::keep_alive<0, 1>(), "Create a unranked memref type") + .def_property_readonly( + "memory_space", + [](PyUnrankedMemRefType &self) -> unsigned { + return mlirUnrankedMemrefGetMemorySpace(self.type); + }, + "Returns the memory space of the given Unranked MemRef type."); + } +}; + /// Tuple Type subclass - TupleType. class PyTupleType : public PyConcreteType { public: @@ -886,6 +1117,11 @@ void mlir::python::populateIRSubmodule(py::module &m) { PyF64Type::bind(m); PyNoneType::bind(m); PyComplexType::bind(m); + PyShapedType::bind(m); PyVectorType::bind(m); + PyRankedTensorType::bind(m); + PyUnrankedTensorType::bind(m); + PyMemRefType::bind(m); + PyUnrankedMemRefType::bind(m); PyTupleType::bind(m); } diff --git a/mlir/lib/CAPI/IR/StandardTypes.cpp b/mlir/lib/CAPI/IR/StandardTypes.cpp index eb006242e8808..ddd3a5e93147a 100644 --- a/mlir/lib/CAPI/IR/StandardTypes.cpp +++ b/mlir/lib/CAPI/IR/StandardTypes.cpp @@ -168,6 +168,13 @@ MlirType mlirVectorTypeGet(intptr_t rank, int64_t *shape, unwrap(elementType))); } +MlirType mlirVectorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, MlirLocation loc) { + return wrap(VectorType::getChecked( + llvm::makeArrayRef(shape, static_cast(rank)), unwrap(elementType), + unwrap(loc))); +} + /* ========================================================================== */ /* Ranked / Unranked tensor type. */ /* ========================================================================== */ @@ -189,10 +196,23 @@ MlirType mlirRankedTensorTypeGet(intptr_t rank, int64_t *shape, unwrap(elementType))); } +MlirType mlirRankedTensorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, + MlirLocation loc) { + return wrap(RankedTensorType::getChecked( + llvm::makeArrayRef(shape, static_cast(rank)), unwrap(elementType), + unwrap(loc))); +} + MlirType mlirUnrankedTensorTypeGet(MlirType elementType) { return wrap(UnrankedTensorType::get(unwrap(elementType))); } +MlirType mlirUnrankedTensorTypeGetChecked(MlirType elementType, + MlirLocation loc) { + return wrap(UnrankedTensorType::getChecked(unwrap(elementType), unwrap(loc))); +} + /* ========================================================================== */ /* Ranked / Unranked MemRef type. */ /* ========================================================================== */ @@ -216,6 +236,15 @@ MlirType mlirMemRefTypeContiguousGet(MlirType elementType, intptr_t rank, unwrap(elementType), llvm::None, memorySpace)); } +MlirType mlirMemRefTypeContiguousGetChecked(MlirType elementType, intptr_t rank, + int64_t *shape, + unsigned memorySpace, + MlirLocation loc) { + return wrap(MemRefType::getChecked( + llvm::makeArrayRef(shape, static_cast(rank)), unwrap(elementType), + llvm::None, memorySpace, unwrap(loc))); +} + intptr_t mlirMemRefTypeGetNumAffineMaps(MlirType type) { return static_cast( unwrap(type).cast().getAffineMaps().size()); @@ -237,6 +266,13 @@ MlirType mlirUnrankedMemRefTypeGet(MlirType elementType, unsigned memorySpace) { return wrap(UnrankedMemRefType::get(unwrap(elementType), memorySpace)); } +MlirType mlirUnrankedMemRefTypeGetChecked(MlirType elementType, + unsigned memorySpace, + MlirLocation loc) { + return wrap(UnrankedMemRefType::getChecked(unwrap(elementType), memorySpace, + unwrap(loc))); +} + unsigned mlirUnrankedMemrefGetMemorySpace(MlirType type) { return unwrap(type).cast().getMemorySpace(); } diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index c2bb2130569d3..fe2af07b2a6a8 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -12,7 +12,6 @@ add_subdirectory(OpenMPToLLVM) add_subdirectory(SCFToGPU) add_subdirectory(SCFToSPIRV) add_subdirectory(SCFToStandard) -add_subdirectory(ShapeToSCF) add_subdirectory(ShapeToStandard) add_subdirectory(SPIRVToLLVM) add_subdirectory(StandardToLLVM) diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp index 0460d98b44a47..f38eabb9465d5 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -339,7 +339,8 @@ class TransposeOpConversion : public ConvertToLLVMPattern { class YieldOpConversion : public ConvertToLLVMPattern { public: explicit YieldOpConversion(MLIRContext *context, LLVMTypeConverter &lowering_) - : ConvertToLLVMPattern(YieldOp::getOperationName(), context, lowering_) {} + : ConvertToLLVMPattern(linalg::YieldOp::getOperationName(), context, + lowering_) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, diff --git a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp index 34ee48758e9e6..14f365f95ee5a 100644 --- a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp +++ b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp @@ -356,7 +356,7 @@ ParallelLowering::matchAndRewrite(ParallelOp parallelOp, // A loop is constructed with an empty "yield" terminator if there are // no results. rewriter.setInsertionPointToEnd(rewriter.getInsertionBlock()); - rewriter.create(loc, forOp.getResults()); + rewriter.create(loc, forOp.getResults()); } rewriter.setInsertionPointToStart(forOp.getBody()); @@ -391,7 +391,7 @@ ParallelLowering::matchAndRewrite(ParallelOp parallelOp, if (!yieldOperands.empty()) { rewriter.setInsertionPointToEnd(rewriter.getInsertionBlock()); - rewriter.create(loc, yieldOperands); + rewriter.create(loc, yieldOperands); } rewriter.replaceOp(parallelOp, loopResults); diff --git a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt b/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt deleted file mode 100644 index 60dd2b8514da4..0000000000000 --- a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -add_mlir_conversion_library(MLIRShapeToSCF - ShapeToSCF.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ShapeToSCF - - DEPENDS - MLIRConversionPassIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRIR - MLIRShape - MLIRPass - MLIRSCF - MLIRTransforms - ) diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp deleted file mode 100644 index ae326c5c513e6..0000000000000 --- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp +++ /dev/null @@ -1,337 +0,0 @@ -//===- ShapeToSCF.cpp - conversion from Shape to SCF dialect --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" - -#include "../PassDetail.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/Transforms/DialectConversion.h" - -using namespace mlir; -using namespace mlir::shape; -using namespace mlir::scf; - -namespace { -struct BroadcastOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(BroadcastOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult BroadcastOpConverter::matchAndRewrite( - BroadcastOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.getType().isa()) - return failure(); - - assert(!op.lhs().getType().isa() && - !op.rhs().getType().isa()); - auto loc = op.getLoc(); - BroadcastOp::Adaptor transformed(operands); - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - - // Find smaller and greater rank and extent tensor. - Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); - Value lhsSmaller = - rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); - Type indexTy = rewriter.getIndexType(); - Type extentTensorTy = op.getType(); - auto ifOp = rewriter.create( - loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, - lhsSmaller, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{lhsRank, transformed.lhs(), - rhsRank, transformed.rhs()}); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{rhsRank, transformed.rhs(), - lhsRank, transformed.lhs()}); - }); - Value smallerRank = ifOp.getResult(0); - Value smallerOperand = ifOp.getResult(1); - Value greaterRank = ifOp.getResult(2); - Value greaterOperand = ifOp.getResult(3); - - // Allocate stack memory for the broadcasted extent tensor. - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); - - // Copy extents from greater operand that are not challenged. - Value rankDiff = - rewriter.create(loc, indexTy, greaterRank, smallerRank); - rewriter.create(loc, zero, rankDiff, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value extent = b.create( - loc, greaterOperand, ValueRange{iv}); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Determine remaining broadcasted extents. - rewriter.create( - loc, rankDiff, greaterRank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value greaterOperandExtent = - b.create(loc, greaterOperand, ValueRange{iv}); - Value greaterOperandExtentIsOne = - b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); - auto ifOp = b.create( - loc, TypeRange{indexTy}, greaterOperandExtentIsOne, - [&](OpBuilder &b, Location loc) { - Value ivShifted = b.create(loc, indexTy, iv, rankDiff); - Value smallerOperandExtent = b.create( - loc, smallerOperand, ValueRange{ivShifted}); - b.create(loc, smallerOperandExtent); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, greaterOperandExtent); - }); - Value extent = ifOp.getResult(0); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Load broadcasted shape as an extent tensor. - rewriter.replaceOpWithNewOp(op, mem); - return success(); -} - -namespace { -/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is -/// only defined on `tensor` operands. The test for equality first -/// compares their size and, if equal, checks every extent for equality. -/// -/// Example: -/// -/// %result = shape.shape_eq %a, %b : tensor, tensor -/// -/// becomes -/// -/// %c0 = constant 0 : index -/// %0 = dim %arg0, %c0 : tensor -/// %1 = dim %arg1, %c0 : tensor -/// %2 = cmpi "eq", %0, %1 : index -/// %result = scf.if %2 -> (i1) { -/// %c1 = constant 1 : index -/// %true = constant true -/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { -/// %5 = extract_element %arg0[%arg2] : tensor -/// %6 = extract_element %arg1[%arg2] : tensor -/// %7 = cmpi "eq", %5, %6 : index -/// %8 = and %arg3, %7 : i1 -/// scf.yield %8 : i1 -/// } -/// scf.yield %4 : i1 -/// } else { -/// %false = constant false -/// scf.yield %false : i1 -/// } -/// -struct ShapeEqOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.lhs().getType().isa() || - op.rhs().getType().isa()) { - return failure(); - } - - ShapeEqOp::Adaptor transformed(operands); - auto loc = op.getLoc(); - Type indexTy = rewriter.getIndexType(); - Value zero = rewriter.create(loc, 0); - Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); - Value eqRank = - rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); - Type i1Ty = rewriter.getI1Type(); - rewriter.replaceOpWithNewOp( - op, i1Ty, eqRank, - [&](OpBuilder &b, Location loc) { - Value one = b.create(loc, 1); - Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); - auto loop = b.create( - loc, zero, lhsRank, one, ValueRange{init}, - [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { - Value conj = args[0]; - Value lhsExtent = - b.create(loc, transformed.lhs(), iv); - Value rhsExtent = - b.create(loc, transformed.rhs(), iv); - Value eqExtent = b.create(loc, CmpIPredicate::eq, - lhsExtent, rhsExtent); - Value conjNext = b.create(loc, conj, eqExtent); - b.create(loc, ValueRange({conjNext})); - }); - b.create(loc, loop.getResults()); - }, - [&](OpBuilder &b, Location loc) { - Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); - b.create(loc, result); - }); - return success(); -} - -namespace { -/// Converts `shape.reduce` to `scf.for`. -struct ReduceOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final; -}; -} // namespace - -LogicalResult -ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands. - if (op.shape().getType().isa()) - return failure(); - - auto loc = op.getLoc(); - shape::ReduceOp::Adaptor transformed(operands); - - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - Type indexTy = rewriter.getIndexType(); - Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); - - auto loop = rewriter.create( - loc, zero, rank, one, op.initVals(), - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value extent = b.create(loc, transformed.shape(), iv); - - SmallVector mappedValues{iv, extent}; - mappedValues.append(args.begin(), args.end()); - - BlockAndValueMapping mapping; - Block *reduceBody = op.getBody(); - mapping.map(reduceBody->getArguments(), mappedValues); - for (auto &nested : reduceBody->without_terminator()) - b.clone(nested, mapping); - - SmallVector mappedResults; - for (auto result : reduceBody->getTerminator()->getOperands()) - mappedResults.push_back(mapping.lookup(result)); - b.create(loc, mappedResults); - }); - - rewriter.replaceOp(op, loop.getResults()); - return success(); -} - -namespace { -/// Converts `shape_of` to for loop for unranked tensors. -class ShapeOfOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering supports only error-free arguments. - if (op.getType().isa()) - return failure(); - - // For ranked tensors `shape_of` lowers to `std` and the pattern can be - // found in the corresponding pass. - ShapeOfOp::Adaptor transformed(operands); - Value arg = transformed.arg(); - Type argTy = arg.getType(); - if (argTy.isa()) - return failure(); - - // Allocate stack memory. - auto loc = op.getLoc(); - Value rank = rewriter.create(loc, arg); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, arg, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); - }); - - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); - return success(); -} - -namespace { -struct ConvertShapeToSCFPass - : public ConvertShapeToSCFBase { - void runOnFunction() override; -}; -} // namespace - -void ConvertShapeToSCFPass::runOnFunction() { - MLIRContext &ctx = getContext(); - - // Populate conversion patterns. - OwningRewritePatternList patterns; - populateShapeToSCFConversionPatterns(patterns, &ctx); - - // Setup target legality. - ConversionTarget target(getContext()); - target.addLegalDialect(); - - // Apply conversion. - if (failed(applyPartialConversion(getFunction(), target, patterns))) - signalPassFailure(); -} - -void mlir::populateShapeToSCFConversionPatterns( - OwningRewritePatternList &patterns, MLIRContext *ctx) { - // clang-format off - patterns.insert< - BroadcastOpConverter, - ShapeEqOpConverter, - ReduceOpConverter, - ShapeOfOpConverter>(ctx); - // clang-format on -} - -std::unique_ptr mlir::createConvertShapeToSCFPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index e92bb83d4f424..8c917e08f942c 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -12,10 +12,12 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/Transforms/DialectConversion.h" using namespace mlir; using namespace mlir::shape; +using namespace mlir::scf; /// Conversion patterns. namespace { @@ -63,67 +65,94 @@ class BinaryOpConversion : public OpConversionPattern { } // namespace namespace { -class ConstSizeOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ConstSizeOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); - return success(); - } -}; -} // namespace - -namespace { -class ShapeOfOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; +struct BroadcastOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, + matchAndRewrite(BroadcastOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override; }; } // namespace -LogicalResult ShapeOfOpConversion::matchAndRewrite( - ShapeOfOp op, ArrayRef operands, +LogicalResult BroadcastOpConverter::matchAndRewrite( + BroadcastOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { - - // For now, only error-free types are supported by this lowering. + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. if (op.getType().isa()) return failure(); - // For unranked tensors `shape_of` lowers to `scf` and the pattern can be - // found in the corresponding pass. - ShapeOfOp::Adaptor transformed(operands); - Value tensorVal = transformed.arg(); - Type tensorTy = tensorVal.getType(); - if (tensorTy.isa()) - return failure(); - - // Build values for individual dimensions. - SmallVector dimValues; - RankedTensorType rankedTensorTy = tensorTy.cast(); - int64_t rank = rankedTensorTy.getRank(); + assert(!op.lhs().getType().isa() && + !op.rhs().getType().isa()); auto loc = op.getLoc(); - for (int64_t i = 0; i < rank; i++) { - if (rankedTensorTy.isDynamicDim(i)) { - Value dimVal = rewriter.create(loc, tensorVal, i); - dimValues.push_back(dimVal); - } else { - int64_t dim = rankedTensorTy.getDimSize(i); - Value dimVal = rewriter.create(loc, dim); - dimValues.push_back(dimVal); - } - } - - // Materialize extent tensor. - Value staticExtentTensor = - rewriter.create(loc, dimValues); - rewriter.replaceOpWithNewOp(op, staticExtentTensor, - op.getType()); + BroadcastOp::Adaptor transformed(operands); + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + + // Find smaller and greater rank and extent tensor. + Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); + Value lhsSmaller = + rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); + Type indexTy = rewriter.getIndexType(); + Type extentTensorTy = op.getType(); + auto ifOp = rewriter.create( + loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, + lhsSmaller, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{lhsRank, transformed.lhs(), + rhsRank, transformed.rhs()}); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{rhsRank, transformed.rhs(), + lhsRank, transformed.lhs()}); + }); + Value smallerRank = ifOp.getResult(0); + Value smallerOperand = ifOp.getResult(1); + Value greaterRank = ifOp.getResult(2); + Value greaterOperand = ifOp.getResult(3); + + // Allocate stack memory for the broadcasted extent tensor. + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); + + // Copy extents from greater operand that are not challenged. + Value rankDiff = + rewriter.create(loc, indexTy, greaterRank, smallerRank); + rewriter.create(loc, zero, rankDiff, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value extent = b.create( + loc, greaterOperand, ValueRange{iv}); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Determine remaining broadcasted extents. + rewriter.create( + loc, rankDiff, greaterRank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value greaterOperandExtent = + b.create(loc, greaterOperand, ValueRange{iv}); + Value greaterOperandExtentIsOne = + b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); + auto ifOp = b.create( + loc, TypeRange{indexTy}, greaterOperandExtentIsOne, + [&](OpBuilder &b, Location loc) { + Value ivShifted = b.create(loc, indexTy, iv, rankDiff); + Value smallerOperandExtent = b.create( + loc, smallerOperand, ValueRange{ivShifted}); + b.create(loc, smallerOperandExtent); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, greaterOperandExtent); + }); + Value extent = ifOp.getResult(0); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Load broadcasted shape as an extent tensor. + rewriter.replaceOpWithNewOp(op, mem); return success(); } @@ -161,26 +190,23 @@ LogicalResult ConstShapeOpConverter::matchAndRewrite( } namespace { -class ToExtentTensorOpConversion - : public OpConversionPattern { +class ConstSizeOpConversion : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - ToExtentTensorOpAdaptor adaptor(operands); - - if (!adaptor.input().getType().isa()) - return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); - - rewriter.replaceOpWithNewOp(op, adaptor.input(), - op.getType()); - return success(); - } + matchAndRewrite(ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; }; } // namespace +LogicalResult ConstSizeOpConversion::matchAndRewrite( + ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); + return success(); +} + namespace { class GetExtentOpConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -239,6 +265,236 @@ RankOpConverter::matchAndRewrite(shape::RankOp op, ArrayRef operands, return success(); } +namespace { +/// Converts `shape.reduce` to `scf.for`. +struct ReduceOpConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final; +}; +} // namespace + +LogicalResult +ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands. + if (op.shape().getType().isa()) + return failure(); + + auto loc = op.getLoc(); + shape::ReduceOp::Adaptor transformed(operands); + + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + Type indexTy = rewriter.getIndexType(); + Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); + + auto loop = rewriter.create( + loc, zero, rank, one, op.initVals(), + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value extent = b.create(loc, transformed.shape(), iv); + + SmallVector mappedValues{iv, extent}; + mappedValues.append(args.begin(), args.end()); + + BlockAndValueMapping mapping; + Block *reduceBody = op.getBody(); + mapping.map(reduceBody->getArguments(), mappedValues); + for (auto &nested : reduceBody->without_terminator()) + b.clone(nested, mapping); + + SmallVector mappedResults; + for (auto result : reduceBody->getTerminator()->getOperands()) + mappedResults.push_back(mapping.lookup(result)); + b.create(loc, mappedResults); + }); + + rewriter.replaceOp(op, loop.getResults()); + return success(); +} + +namespace { +/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is +/// only defined on `tensor` operands. The test for equality first +/// compares their size and, if equal, checks every extent for equality. +/// +/// Example: +/// +/// %result = shape.shape_eq %a, %b : tensor, tensor +/// +/// becomes +/// +/// %c0 = constant 0 : index +/// %0 = dim %arg0, %c0 : tensor +/// %1 = dim %arg1, %c0 : tensor +/// %2 = cmpi "eq", %0, %1 : index +/// %result = scf.if %2 -> (i1) { +/// %c1 = constant 1 : index +/// %true = constant true +/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { +/// %5 = extract_element %arg0[%arg2] : tensor +/// %6 = extract_element %arg1[%arg2] : tensor +/// %7 = cmpi "eq", %5, %6 : index +/// %8 = and %arg3, %7 : i1 +/// scf.yield %8 : i1 +/// } +/// scf.yield %4 : i1 +/// } else { +/// %false = constant false +/// scf.yield %false : i1 +/// } +/// +struct ShapeEqOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult +ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. + if (op.lhs().getType().isa() || + op.rhs().getType().isa()) { + return failure(); + } + + ShapeEqOp::Adaptor transformed(operands); + auto loc = op.getLoc(); + Type indexTy = rewriter.getIndexType(); + Value zero = rewriter.create(loc, 0); + Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); + Value eqRank = + rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); + Type i1Ty = rewriter.getI1Type(); + rewriter.replaceOpWithNewOp( + op, i1Ty, eqRank, + [&](OpBuilder &b, Location loc) { + Value one = b.create(loc, 1); + Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); + auto loop = b.create( + loc, zero, lhsRank, one, ValueRange{init}, + [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { + Value conj = args[0]; + Value lhsExtent = + b.create(loc, transformed.lhs(), iv); + Value rhsExtent = + b.create(loc, transformed.rhs(), iv); + Value eqExtent = b.create(loc, CmpIPredicate::eq, + lhsExtent, rhsExtent); + Value conjNext = b.create(loc, conj, eqExtent); + b.create(loc, ValueRange({conjNext})); + }); + b.create(loc, loop.getResults()); + }, + [&](OpBuilder &b, Location loc) { + Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); + b.create(loc, result); + }); + return success(); +} + +namespace { +class ShapeOfOpConversion : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult ShapeOfOpConversion::matchAndRewrite( + ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + + // For now, only error-free types are supported by this lowering. + if (op.getType().isa()) + return failure(); + + // For ranked tensor arguments, lower to `tensor_from_elements`. + ShapeOfOp::Adaptor transformed(operands); + Value tensor = transformed.arg(); + Type tensorTy = tensor.getType(); + if (tensorTy.isa()) { + + // Build values for individual extents. + SmallVector extentValues; + RankedTensorType rankedTensorTy = tensorTy.cast(); + int64_t rank = rankedTensorTy.getRank(); + auto loc = op.getLoc(); + for (int64_t i = 0; i < rank; i++) { + if (rankedTensorTy.isDynamicDim(i)) { + Value extent = rewriter.create(loc, tensor, i); + extentValues.push_back(extent); + } else { + Value extent = + rewriter.create(loc, rankedTensorTy.getDimSize(i)); + extentValues.push_back(extent); + } + } + + // Materialize extent tensor. + Value staticExtentTensor = + rewriter.create(loc, extentValues); + rewriter.replaceOpWithNewOp(op, staticExtentTensor, + op.getType()); + return success(); + } + + // Allocate stack memory. + auto loc = op.getLoc(); + Value rank = rewriter.create(loc, tensor); + Type indexTy = rewriter.getIndexType(); + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{rank}); + + // Copy shape extents to stack-allocated memory. + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + rewriter.create( + loc, zero, rank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value dim = rewriter.create(loc, tensor, iv); + rewriter.create(loc, dim, mem, ValueRange{iv}); + rewriter.create(loc); + }); + + // Load extents to tensor value. + rewriter.replaceOpWithNewOp(op.getOperation(), mem); + return success(); +} + +namespace { +class ToExtentTensorOpConversion + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + ToExtentTensorOpAdaptor adaptor(operands); + + if (!adaptor.input().getType().isa()) + return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); + + rewriter.replaceOpWithNewOp(op, adaptor.input(), + op.getType()); + return success(); + } +}; +} // namespace + namespace { /// Conversion pass. class ConvertShapeToStandardPass @@ -252,7 +508,7 @@ void ConvertShapeToStandardPass::runOnOperation() { // Setup target legality. MLIRContext &ctx = getContext(); ConversionTarget target(ctx); - target.addLegalDialect(); + target.addLegalDialect(); target.addLegalOp(); // Setup conversion patterns. @@ -271,11 +527,14 @@ void mlir::populateShapeToStandardConversionPatterns( patterns.insert< AnyOpConversion, BinaryOpConversion, - ConstShapeOpConverter, BinaryOpConversion, + BroadcastOpConverter, + ConstShapeOpConverter, ConstSizeOpConversion, GetExtentOpConverter, RankOpConverter, + ReduceOpConverter, + ShapeEqOpConverter, ShapeOfOpConversion, ToExtentTensorOpConversion>(ctx); // clang-format on diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 401509f1f8a60..55a926ef1423d 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1893,11 +1893,17 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern { // Adjust the allocation size to consider alignment. if (Optional alignment = allocOp.alignment()) { accessAlignment = createIndexConstant(rewriter, loc, *alignment); - cumulativeSize = rewriter.create( - loc, - rewriter.create(loc, cumulativeSize, accessAlignment), - one); + } else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) { + // In the case where no alignment is specified, we may want to override + // `malloc's` behavior. `malloc` typically aligns at the size of the + // biggest scalar on a target HW. For non-scalars, use the natural + // alignment of the LLVM type given by the LLVM DataLayout. + accessAlignment = + this->getSizeInBytes(loc, memRefType.getElementType(), rewriter); } + if (accessAlignment) + cumulativeSize = + rewriter.create(loc, cumulativeSize, accessAlignment); callArgs.push_back(cumulativeSize); } auto allocFuncSymbol = rewriter.getSymbolRefAttr(allocFunc); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index ecb047a1ad143..a43bec855ff0a 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -117,6 +117,45 @@ static SmallVector getI64SubArray(ArrayAttr arrayAttr, return res; } +// Helper that returns a vector comparison that constructs a mask: +// mask = [0,1,..,n-1] + [o,o,..,o] < [b,b,..,b] +// +// NOTE: The LLVM::GetActiveLaneMaskOp intrinsic would provide an alternative, +// much more compact, IR for this operation, but LLVM eventually +// generates more elaborate instructions for this intrinsic since it +// is very conservative on the boundary conditions. +static Value buildVectorComparison(ConversionPatternRewriter &rewriter, + Operation *op, bool enableIndexOptimizations, + int64_t dim, Value b, Value *off = nullptr) { + auto loc = op->getLoc(); + // If we can assume all indices fit in 32-bit, we perform the vector + // comparison in 32-bit to get a higher degree of SIMD parallelism. + // Otherwise we perform the vector comparison using 64-bit indices. + Value indices; + Type idxType; + if (enableIndexOptimizations) { + indices = rewriter.create( + loc, rewriter.getI32VectorAttr( + llvm::to_vector<4>(llvm::seq(0, dim)))); + idxType = rewriter.getI32Type(); + } else { + indices = rewriter.create( + loc, rewriter.getI64VectorAttr( + llvm::to_vector<4>(llvm::seq(0, dim)))); + idxType = rewriter.getI64Type(); + } + // Add in an offset if requested. + if (off) { + Value o = rewriter.create(loc, idxType, *off); + Value ov = rewriter.create(loc, indices.getType(), o); + indices = rewriter.create(loc, ov, indices); + } + // Construct the vector comparison. + Value bound = rewriter.create(loc, idxType, b); + Value bounds = rewriter.create(loc, indices.getType(), bound); + return rewriter.create(loc, CmpIPredicate::slt, indices, bounds); +} + // Helper that returns data layout alignment of an operation with memref. template LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, T op, @@ -408,11 +447,9 @@ class VectorGatherOpConversion : public ConvertToLLVMPattern { return failure(); // Replace with the gather intrinsic. - ValueRange v = (llvm::size(adaptor.pass_thru()) == 0) ? ValueRange({}) - : adaptor.pass_thru(); rewriter.replaceOpWithNewOp( - gather, typeConverter.convertType(vType), ptrs, adaptor.mask(), v, - rewriter.getI32IntegerAttr(align)); + gather, typeConverter.convertType(vType), ptrs, adaptor.mask(), + adaptor.pass_thru(), rewriter.getI32IntegerAttr(align)); return success(); } }; @@ -512,10 +549,10 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { public: explicit VectorReductionOpConversion(MLIRContext *context, LLVMTypeConverter &typeConverter, - bool reassociateFP) + bool reassociateFPRed) : ConvertToLLVMPattern(vector::ReductionOp::getOperationName(), context, typeConverter), - reassociateFPReductions(reassociateFP) {} + reassociateFPReductions(reassociateFPRed) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, @@ -589,6 +626,34 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { const bool reassociateFPReductions; }; +/// Conversion pattern for a vector.create_mask (1-D only). +class VectorCreateMaskOpConversion : public ConvertToLLVMPattern { +public: + explicit VectorCreateMaskOpConversion(MLIRContext *context, + LLVMTypeConverter &typeConverter, + bool enableIndexOpt) + : ConvertToLLVMPattern(vector::CreateMaskOp::getOperationName(), context, + typeConverter), + enableIndexOptimizations(enableIndexOpt) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto dstType = op->getResult(0).getType().cast(); + int64_t rank = dstType.getRank(); + if (rank == 1) { + rewriter.replaceOp( + op, buildVectorComparison(rewriter, op, enableIndexOptimizations, + dstType.getDimSize(0), operands[0])); + return success(); + } + return failure(); + } + +private: + const bool enableIndexOptimizations; +}; + class VectorShuffleOpConversion : public ConvertToLLVMPattern { public: explicit VectorShuffleOpConversion(MLIRContext *context, @@ -1121,17 +1186,19 @@ class VectorTypeCastOpConversion : public ConvertToLLVMPattern { /// Conversion pattern that converts a 1-D vector transfer read/write op in a /// sequence of: -/// 1. Bitcast or addrspacecast to vector form. -/// 2. Create an offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -/// 3. Create a mask where offsetVector is compared against memref upper bound. -/// 4. Rewrite op as a masked read or write. +/// 1. Get the source/dst address as an LLVM vector pointer. +/// 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. +/// 3. Create an offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. +/// 4. Create a mask where offsetVector is compared against memref upper bound. +/// 5. Rewrite op as a masked read or write. template class VectorTransferConversion : public ConvertToLLVMPattern { public: explicit VectorTransferConversion(MLIRContext *context, - LLVMTypeConverter &typeConv) - : ConvertToLLVMPattern(ConcreteOp::getOperationName(), context, - typeConv) {} + LLVMTypeConverter &typeConv, + bool enableIndexOpt) + : ConvertToLLVMPattern(ConcreteOp::getOperationName(), context, typeConv), + enableIndexOptimizations(enableIndexOpt) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, @@ -1155,7 +1222,6 @@ class VectorTransferConversion : public ConvertToLLVMPattern { auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); }; Location loc = op->getLoc(); - Type i64Type = rewriter.getIntegerType(64); MemRefType memRefType = xferOp.getMemRefType(); if (auto memrefVectorElementType = @@ -1202,41 +1268,26 @@ class VectorTransferConversion : public ConvertToLLVMPattern { xferOp, operands, vectorDataPtr); // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. - unsigned vecWidth = vecTy.getVectorNumElements(); - VectorType vectorCmpType = VectorType::get(vecWidth, i64Type); - SmallVector indices; - indices.reserve(vecWidth); - for (unsigned i = 0; i < vecWidth; ++i) - indices.push_back(i); - Value linearIndices = rewriter.create( - loc, vectorCmpType, - DenseElementsAttr::get(vectorCmpType, ArrayRef(indices))); - linearIndices = rewriter.create( - loc, toLLVMTy(vectorCmpType), linearIndices); - // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. - // TODO: when the leaf transfer rank is k > 1 we need the last - // `k` dimensions here. - unsigned lastIndex = llvm::size(xferOp.indices()) - 1; - Value offsetIndex = *(xferOp.indices().begin() + lastIndex); - offsetIndex = rewriter.create(loc, i64Type, offsetIndex); - Value base = rewriter.create(loc, vectorCmpType, offsetIndex); - Value offsetVector = rewriter.create(loc, base, linearIndices); - // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] + // + // TODO: when the leaf transfer rank is k > 1, we need the last `k` + // dimensions here. + unsigned vecWidth = vecTy.getVectorNumElements(); + unsigned lastIndex = llvm::size(xferOp.indices()) - 1; + Value off = xferOp.indices()[lastIndex]; Value dim = rewriter.create(loc, xferOp.memref(), lastIndex); - dim = rewriter.create(loc, i64Type, dim); - dim = rewriter.create(loc, vectorCmpType, dim); - Value mask = - rewriter.create(loc, CmpIPredicate::slt, offsetVector, dim); - mask = rewriter.create(loc, toLLVMTy(mask.getType()), - mask); + Value mask = buildVectorComparison(rewriter, op, enableIndexOptimizations, + vecWidth, dim, &off); // 5. Rewrite as a masked read / write. return replaceTransferOpWithMasked(rewriter, typeConverter, loc, xferOp, operands, vectorDataPtr, mask); } + +private: + const bool enableIndexOptimizations; }; class VectorPrintOpConversion : public ConvertToLLVMPattern { @@ -1444,7 +1495,7 @@ class VectorExtractStridedSliceOpConversion /// Populate the given list with patterns that convert from Vector to LLVM. void mlir::populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, - bool reassociateFPReductions) { + bool reassociateFPReductions, bool enableIndexOptimizations) { MLIRContext *ctx = converter.getDialect()->getContext(); // clang-format off patterns.insert(ctx); patterns.insert( ctx, converter, reassociateFPReductions); + patterns.insert, + VectorTransferConversion>( + ctx, converter, enableIndexOptimizations); patterns .insert, - VectorTransferConversion, VectorTypeCastOpConversion, VectorMaskedLoadOpConversion, VectorMaskedStoreOpConversion, @@ -1485,6 +1538,7 @@ struct LowerVectorToLLVMPass : public ConvertVectorToLLVMBase { LowerVectorToLLVMPass(const LowerVectorToLLVMOptions &options) { this->reassociateFPReductions = options.reassociateFPReductions; + this->enableIndexOptimizations = options.enableIndexOptimizations; } void runOnOperation() override; }; @@ -1505,15 +1559,14 @@ void LowerVectorToLLVMPass::runOnOperation() { LLVMTypeConverter converter(&getContext()); OwningRewritePatternList patterns; populateVectorToLLVMMatrixConversionPatterns(converter, patterns); - populateVectorToLLVMConversionPatterns(converter, patterns, - reassociateFPReductions); + populateVectorToLLVMConversionPatterns( + converter, patterns, reassociateFPReductions, enableIndexOptimizations); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateStdToLLVMConversionPatterns(converter, patterns); LLVMConversionTarget target(getContext()); - if (failed(applyPartialConversion(getOperation(), target, patterns))) { + if (failed(applyPartialConversion(getOperation(), target, patterns))) signalPassFailure(); - } } std::unique_ptr> diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 0f428f887d124..8f7d43829846b 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -232,8 +232,7 @@ static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType, op->getParentWithTrait(); assert(scope && "Expected op to be inside automatic allocation scope"); b.setInsertionPointToStart(&scope->getRegion(0).front()); - Value res = - std_alloca(memRefMinorVectorType, ValueRange{}, b.getI64IntegerAttr(128)); + Value res = std_alloca(memRefMinorVectorType); return res; } @@ -492,8 +491,10 @@ template MemRefType VectorTransferRewriter::tmpMemRefType( TransferOpTy transfer) const { auto vectorType = transfer.getVectorType(); - return MemRefType::get(vectorType.getShape(), vectorType.getElementType(), {}, - 0); + return MemRefType::get(vectorType.getShape().drop_back(), + VectorType::get(vectorType.getShape().take_back(), + vectorType.getElementType()), + {}, 0); } /// Lowers TransferReadOp into a combination of: @@ -545,7 +546,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace mlir::edsc::op; TransferReadOp transfer = cast(op); - if (transfer.permutation_map().isMinorIdentity()) { + + // Fall back to a loop if the fastest varying stride is not 1 or it is + // permuted. + int64_t offset; + SmallVector strides; + auto successStrides = + getStridesAndOffset(transfer.getMemRefType(), strides, offset); + if (succeeded(successStrides) && strides.back() == 1 && + transfer.permutation_map().isMinorIdentity()) { // If > 1D, emit a bunch of loops around 1-D vector transfers. if (transfer.getVectorType().getRank() > 1) return NDTransferOpHelper(rewriter, transfer, options) @@ -575,7 +584,7 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-copy-load-dealloc. - Value tmp = std_alloc(tmpMemRefType(transfer)); + Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { @@ -584,10 +593,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( if (coalescedIdx >= 0) std::swap(ivs.back(), ivs[coalescedIdx]); // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - local(ivs) = remote(clip(transfer, memRefBoundsCapture, ivs)); + SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); + ArrayRef indicesRef(indices), ivsRef(ivs); + Value pos = + std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); + Value vector = vector_insert_element(remote(indicesRef), + local(ivsRef.drop_back()), pos); + local(ivsRef.drop_back()) = vector; }); Value vectorValue = std_load(vec); - (std_dealloc(tmp)); // vexing parse // 3. Propagate. rewriter.replaceOp(op, vectorValue); @@ -618,7 +632,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace edsc::op; TransferWriteOp transfer = cast(op); - if (transfer.permutation_map().isMinorIdentity()) { + + // Fall back to a loop if the fastest varying stride is not 1 or it is + // permuted. + int64_t offset; + SmallVector strides; + auto successStrides = + getStridesAndOffset(transfer.getMemRefType(), strides, offset); + if (succeeded(successStrides) && strides.back() == 1 && + transfer.permutation_map().isMinorIdentity()) { // If > 1D, emit a bunch of loops around 1-D vector transfers. if (transfer.getVectorType().getRank() > 1) return NDTransferOpHelper(rewriter, transfer, options) @@ -648,7 +670,7 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-store-copy-dealloc. - Value tmp = std_alloc(tmpMemRefType(transfer)); + Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); @@ -658,10 +680,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( if (coalescedIdx >= 0) std::swap(ivs.back(), ivs[coalescedIdx]); // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - remote(clip(transfer, memRefBoundsCapture, ivs)) = local(ivs); + SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); + ArrayRef indicesRef(indices), ivsRef(ivs); + Value pos = + std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); + Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos); + remote(indices) = scalar; }); - (std_dealloc(tmp)); // vexing parse... + // 3. Erase. rewriter.eraseOp(op); return success(); } diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp index 364168ce6e2ab..133fef4f0a3f1 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -63,7 +63,7 @@ areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, static bool isMemRefDereferencingOp(Operation &op) { // TODO: Support DMA Ops. - return isa(op); + return isa(op); } // Returns true if the individual op is loop invariant. @@ -84,10 +84,15 @@ bool isOpLoopInvariant(Operation &op, Value indVar, // TODO: Support DMA ops. return false; } else if (!isa(op)) { + // Register op in the set of ops defined inside the loop. This set is used + // to prevent hoisting ops that depend on other ops defined inside the loop + // which are themselves not being hoisted. + definedOps.insert(&op); + if (isMemRefDereferencingOp(op)) { - Value memref = isa(op) - ? cast(op).getMemRef() - : cast(op).getMemRef(); + Value memref = isa(op) + ? cast(op).getMemRef() + : cast(op).getMemRef(); for (auto *user : memref.getUsers()) { // If this memref has a user that is a DMA, give up because these // operations write to this memref. @@ -97,8 +102,9 @@ bool isOpLoopInvariant(Operation &op, Value indVar, // If the memref used by the load/store is used in a store elsewhere in // the loop nest, we do not hoist. Similarly, if the memref used in a // load is also being stored too, we do not hoist the load. - if (isa(user) || - (isa(user) && isa(op))) { + if (isa(user) || + (isa(user) && + isa(op))) { if (&op != user) { SmallVector userIVs; getLoopIVs(*user, &userIVs); @@ -111,9 +117,6 @@ bool isOpLoopInvariant(Operation &op, Value indVar, } } - // Insert this op in the defined ops list. - definedOps.insert(&op); - if (op.getNumOperands() == 0 && !isa(op)) { LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n"); return false; diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp index 1889711cbf7a2..5bded917978a7 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp @@ -167,8 +167,7 @@ constructTiledIndexSetHyperRect(MutableArrayRef origLoops, /// function will return failure when any dependence component is negative along /// any of `origLoops`. static LogicalResult -checkTilingLegality(MutableArrayRef origLoops, - ArrayRef tileSizes) { +checkTilingLegality(MutableArrayRef origLoops) { assert(!origLoops.empty() && "no original loops provided"); // We first find out all dependences we intend to check. @@ -242,7 +241,7 @@ mlir::tilePerfectlyNested(MutableArrayRef input, auto origLoops = input; // Perform tiling legality test. - if (failed(checkTilingLegality(origLoops, tileSizes))) + if (failed(checkTilingLegality(origLoops))) origLoops[0].emitRemark("tiled code is illegal due to dependences"); AffineForOp rootAffineForOp = origLoops[0]; diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 748530f033585..1de7b8957711a 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Debug.h" using namespace mlir; +using namespace vector; /// /// Implements a high-level vectorization strategy on a Function. @@ -918,6 +919,42 @@ static Value vectorizeConstant(Operation *op, ConstantOp constant, Type type) { return b.createOperation(state)->getResult(0); } +/// Returns the vector type resulting from applying the provided vectorization +/// strategy on the scalar type. +static VectorType getVectorType(Type scalarTy, + const VectorizationStrategy *strategy) { + assert(!scalarTy.isa() && "Expected scalar type"); + return VectorType::get(strategy->vectorSizes, scalarTy); +} + +/// Returns true if the provided value is vector uniform given the vectorization +/// strategy. +// TODO: For now, only values that are invariants to all the loops in the +// vectorization strategy are considered vector uniforms. +static bool isUniformDefinition(Value value, + const VectorizationStrategy *strategy) { + for (auto loopToDim : strategy->loopToVectorDim) { + auto loop = cast(loopToDim.first); + if (!loop.isDefinedOutsideOfLoop(value)) + return false; + } + return true; +} + +/// Generates a broadcast op for the provided uniform value using the +/// vectorization strategy in 'state'. +static Value vectorizeUniform(Value value, VectorizationState *state) { + OpBuilder builder(value.getContext()); + builder.setInsertionPointAfter(value); + + auto vectorTy = getVectorType(value.getType(), state->strategy); + auto bcast = builder.create(value.getLoc(), vectorTy, value); + + // Add broadcast to the replacement map to reuse it for other uses. + state->replacementMap[value] = bcast; + return bcast; +} + /// Tries to vectorize a given operand `op` of Operation `op` during /// def-chain propagation or during terminal vectorization, by applying the /// following logic: @@ -927,7 +964,8 @@ static Value vectorizeConstant(Operation *op, ConstantOp constant, Type type) { /// vectorize atm (i.e. broadcasting required), returns nullptr to indicate /// failure; /// 3. if the `op` is a constant, returns the vectorized form of the constant; -/// 4. non-constant scalars are currently non-vectorizable, in particular to +/// 4. if the `op` is uniform, returns a vector broadcast of the `op`; +/// 5. non-constant scalars are currently non-vectorizable, in particular to /// guard against vectorizing an index which may be loop-variant and needs /// special handling. /// @@ -963,12 +1001,15 @@ static Value vectorizeOperand(Value operand, Operation *op, return nullptr; } // 3. vectorize constant. - if (auto constant = operand.getDefiningOp()) { - return vectorizeConstant( - op, constant, - VectorType::get(state->strategy->vectorSizes, operand.getType())); - } - // 4. currently non-vectorizable. + if (auto constant = operand.getDefiningOp()) + return vectorizeConstant(op, constant, + getVectorType(operand.getType(), state->strategy)); + + // 4. Uniform values. + if (isUniformDefinition(operand, state->strategy)) + return vectorizeUniform(operand, state); + + // 5. currently non-vectorizable. LLVM_DEBUG(dbgs() << "-> non-vectorizable: " << operand); return nullptr; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 96d8459e5b3d4..63bd10c2e6f13 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1533,8 +1533,6 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser, static LogicalResult verify(AtomicRMWOp op) { auto ptrType = op.ptr().getType().cast(); - if (!ptrType.isPointerTy()) - return op.emitOpError("expected LLVM IR pointer type for operand #0"); auto valType = op.val().getType().cast(); if (valType != ptrType.getPointerElementTy()) return op.emitOpError("expected LLVM IR element type for operand #0 to " diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index e27650b3297dd..a89287b764e5d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -440,7 +440,8 @@ LogicalResult LLVMStructType::setBody(ArrayRef types, bool isPacked) { bool LLVMStructType::isPacked() { return getImpl()->isPacked(); } bool LLVMStructType::isIdentified() { return getImpl()->isIdentified(); } bool LLVMStructType::isOpaque() { - return getImpl()->isOpaque() || !getImpl()->isInitialized(); + return getImpl()->isIdentified() && + (getImpl()->isOpaque() || !getImpl()->isInitialized()); } bool LLVMStructType::isInitialized() { return getImpl()->isInitialized(); } StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index fa45997ae801a..c9b05f89f30b1 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -905,7 +905,7 @@ static ParseResult parseTransposeOp(OpAsmParser &parser, // YieldOp //===----------------------------------------------------------------------===// -static void print(OpAsmPrinter &p, YieldOp op) { +static void print(OpAsmPrinter &p, linalg::YieldOp op) { p << op.getOperationName(); if (op.getNumOperands() > 0) p << ' ' << op.getOperands(); @@ -926,7 +926,8 @@ static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) { // Check the operand number and types must match the element types of the // LinalgOp interface's shaped operands. -static LogicalResult verifyYield(YieldOp op, LinalgOp linalgOpInterface) { +static LogicalResult verifyYield(linalg::YieldOp op, + LinalgOp linalgOpInterface) { auto nOutputs = linalgOpInterface.getNumOutputs(); if (op.getNumOperands() != nOutputs) return op.emitOpError("expected number of yield values (") @@ -946,7 +947,7 @@ static LogicalResult verifyYield(YieldOp op, LinalgOp linalgOpInterface) { return success(); } -static LogicalResult verify(YieldOp op) { +static LogicalResult verify(linalg::YieldOp op) { auto *parentOp = op.getParentOp(); if (parentOp->getNumRegions() != 1 || parentOp->getRegion(0).empty()) return op.emitOpError("expected single non-empty parent region"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 6c0c841451dae..adbf4a7b80454 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -659,7 +659,7 @@ struct FuseGenericOpsOnTensors { // Add operations from producer (except the yield operation) to the fused // op. for (auto &op : producerBlock.getOperations()) { - if (auto yieldOp = dyn_cast(op)) { + if (auto yieldOp = dyn_cast(op)) { // Lookup the value the yield operation is mapped to. Value yieldVal = yieldOp.getOperand(0); if (Value clonedVal = mapper.lookupOrNull(yieldVal)) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 281edd9a91f64..d4d1d108be71a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -147,7 +147,7 @@ static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, } Operation &terminator = block.back(); - assert(isa(terminator) && + assert(isa(terminator) && "expected a yield op in the end of the region"); for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) { IndexedValueType O(outputBuffers[i]); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 6dc98628850f9..daaad2e6fa4be 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -243,7 +243,9 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { bool isTiled = !isZero(tileSizes[idx]); lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0)); - subViewSizes.push_back(isTiled ? tileSizes[idx] : viewSizes[idx]); + // Before composing, we need to make range a closed interval. + Value size = isTiled ? tileSizes[idx] : viewSizes[idx]; + subViewSizes.push_back(size - std_constant_index(1)); } auto *op = linalgOp.getOperation(); @@ -282,7 +284,9 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, auto m = map.getSubMap({r}); auto offset = applyMapToValues(b, loc, m, lbs).front(); offsets.push_back(offset); - auto size = applyMapToValues(b, loc, m, subViewSizes).front(); + auto closedIntSize = applyMapToValues(b, loc, m, subViewSizes).front(); + // Resulting size needs to be made half open interval again. + auto size = closedIntSize + std_constant_index(1); // The size of the subview should be trimmed to avoid out-of-bounds // accesses, unless we statically know the subview size divides the view diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index c8e20ce57842b..ada89f1c82b5c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -48,14 +48,14 @@ static bool hasMultiplyAddBody(Region &r) { auto c = m_Val(r.getArgument(2)); // TODO: Update this detection once we have matcher support for specifying // that any permutation of operands matches. - auto pattern1 = m_Op(m_Op(m_Op(a, b), c)); - auto pattern2 = m_Op(m_Op(c, m_Op(a, b))); - auto pattern3 = m_Op(m_Op(m_Op(b, a), c)); - auto pattern4 = m_Op(m_Op(c, m_Op(b, a))); - auto pattern5 = m_Op(m_Op(m_Op(a, b), c)); - auto pattern6 = m_Op(m_Op(c, m_Op(a, b))); - auto pattern7 = m_Op(m_Op(m_Op(b, a), c)); - auto pattern8 = m_Op(m_Op(c, m_Op(b, a))); + auto pattern1 = m_Op(m_Op(m_Op(a, b), c)); + auto pattern2 = m_Op(m_Op(c, m_Op(a, b))); + auto pattern3 = m_Op(m_Op(m_Op(b, a), c)); + auto pattern4 = m_Op(m_Op(c, m_Op(b, a))); + auto pattern5 = m_Op(m_Op(m_Op(a, b), c)); + auto pattern6 = m_Op(m_Op(c, m_Op(a, b))); + auto pattern7 = m_Op(m_Op(m_Op(b, a), c)); + auto pattern8 = m_Op(m_Op(c, m_Op(b, a))); return pattern1.match(&r.front().back()) || pattern2.match(&r.front().back()) || pattern3.match(&r.front().back()) || diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 6f3f1e4dc0d15..498246315d642 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -38,7 +38,7 @@ struct SCFInlinerInterface : public DialectInlinerInterface { // as necessary. Required when the region has only one block. void handleTerminator(Operation *op, ArrayRef valuesToRepl) const final { - auto retValOp = dyn_cast(op); + auto retValOp = dyn_cast(op); if (!retValOp) return; @@ -889,7 +889,7 @@ static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) { return success(); } -static void print(OpAsmPrinter &p, YieldOp op) { +static void print(OpAsmPrinter &p, scf::YieldOp op) { p << op.getOperationName(); if (op.getNumOperands() != 0) p << ' ' << op.getOperands() << " : " << op.getOperandTypes(); @@ -899,5 +899,9 @@ static void print(OpAsmPrinter &p, YieldOp op) { // TableGen'd op method definitions //===----------------------------------------------------------------------===// +namespace mlir { +namespace scf { #define GET_OP_CLASSES #include "mlir/Dialect/SCF/SCFOps.cpp.inc" +} // namespace scf +} // namespace mlir diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index f729752e02a00..339f588541f6e 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -468,6 +468,19 @@ static LogicalResult verifyLoadStorePtrAndValTypes(LoadStoreOpTy op, Value ptr, return success(); } +template +static LogicalResult verifyBlockReadWritePtrAndValTypes(BlockReadWriteOpTy op, + Value ptr, Value val) { + auto valType = val.getType(); + if (auto valVecTy = valType.dyn_cast()) + valType = valVecTy.getElementType(); + + if (valType != ptr.getType().cast().getPointeeType()) { + return op.emitOpError("mismatch in result type and pointer type"); + } + return success(); +} + static ParseResult parseVariableDecorations(OpAsmParser &parser, OperationState &state) { auto builtInName = llvm::convertToSnakeFromCamelCase( @@ -2025,6 +2038,93 @@ static LogicalResult verify(spirv::GroupNonUniformBallotOp ballotOp) { return success(); } +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockReadINTEL +//===----------------------------------------------------------------------===// + +static ParseResult parseSubgroupBlockReadINTELOp(OpAsmParser &parser, + OperationState &state) { + // Parse the storage class specification + spirv::StorageClass storageClass; + OpAsmParser::OperandType ptrInfo; + Type elementType; + if (parseEnumStrAttr(storageClass, parser) || parser.parseOperand(ptrInfo) || + parser.parseColon() || parser.parseType(elementType)) { + return failure(); + } + + auto ptrType = spirv::PointerType::get(elementType, storageClass); + if (auto valVecTy = elementType.dyn_cast()) + ptrType = spirv::PointerType::get(valVecTy.getElementType(), storageClass); + + if (parser.resolveOperand(ptrInfo, ptrType, state.operands)) { + return failure(); + } + + state.addTypes(elementType); + return success(); +} + +static void print(spirv::SubgroupBlockReadINTELOp blockReadOp, + OpAsmPrinter &printer) { + SmallVector elidedAttrs; + printer << spirv::SubgroupBlockReadINTELOp::getOperationName() << " " + << blockReadOp.ptr(); + printer << " : " << blockReadOp.getType(); +} + +static LogicalResult verify(spirv::SubgroupBlockReadINTELOp blockReadOp) { + if (failed(verifyBlockReadWritePtrAndValTypes(blockReadOp, blockReadOp.ptr(), + blockReadOp.value()))) + return failure(); + + return success(); +} + +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockWriteINTEL +//===----------------------------------------------------------------------===// + +static ParseResult parseSubgroupBlockWriteINTELOp(OpAsmParser &parser, + OperationState &state) { + // Parse the storage class specification + spirv::StorageClass storageClass; + SmallVector operandInfo; + auto loc = parser.getCurrentLocation(); + Type elementType; + if (parseEnumStrAttr(storageClass, parser) || + parser.parseOperandList(operandInfo, 2) || parser.parseColon() || + parser.parseType(elementType)) { + return failure(); + } + + auto ptrType = spirv::PointerType::get(elementType, storageClass); + if (auto valVecTy = elementType.dyn_cast()) + ptrType = spirv::PointerType::get(valVecTy.getElementType(), storageClass); + + if (parser.resolveOperands(operandInfo, {ptrType, elementType}, loc, + state.operands)) { + return failure(); + } + return success(); +} + +static void print(spirv::SubgroupBlockWriteINTELOp blockWriteOp, + OpAsmPrinter &printer) { + SmallVector elidedAttrs; + printer << spirv::SubgroupBlockWriteINTELOp::getOperationName() << " " + << blockWriteOp.ptr() << ", " << blockWriteOp.value(); + printer << " : " << blockWriteOp.value().getType(); +} + +static LogicalResult verify(spirv::SubgroupBlockWriteINTELOp blockWriteOp) { + if (failed(verifyBlockReadWritePtrAndValTypes( + blockWriteOp, blockWriteOp.ptr(), blockWriteOp.value()))) + return failure(); + + return success(); +} + //===----------------------------------------------------------------------===// // spv.GroupNonUniformElectOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp index b5a82487188c0..8befc6db2935b 100644 --- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp +++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp @@ -38,6 +38,14 @@ spirv::TargetEnv::TargetEnv(spirv::TargetEnvAttr targetAttr) } } +spirv::DeviceType spirv::TargetEnv::getDeviceType() { + auto deviceType = spirv::symbolizeDeviceType( + targetAttr.getResourceLimits().device_type().getInt()); + if (!deviceType) + return DeviceType::Unknown; + return *deviceType; +} + spirv::Version spirv::TargetEnv::getVersion() { return targetAttr.getVersion(); } @@ -134,13 +142,16 @@ DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { spirv::ResourceLimitsAttr spirv::getDefaultResourceLimits(MLIRContext *context) { - auto i32Type = IntegerType::get(32, context); - auto v3i32Type = VectorType::get(3, i32Type); - - // These numbers are from "Table 46. Required Limits" of the Vulkan spec. + // All the fields have default values. Here we just provide a nicer way to + // construct a default resource limit attribute. return spirv::ResourceLimitsAttr ::get( - IntegerAttr::get(i32Type, 128), - DenseIntElementsAttr::get(v3i32Type, {128, 128, 64}), context); + /*vendor_id=*/nullptr, + /*device_id*/ nullptr, + /*device_type=*/nullptr, + /*max_compute_shared_memory_size=*/nullptr, + /*max_compute_workgroup_invocations=*/nullptr, + /*max_compute_workgroup_size=*/nullptr, + /*subgroup_size=*/nullptr, context); } StringRef spirv::getTargetEnvAttrName() { return "spv.target_env"; } diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 511ec9bf2b4e1..bcfaa896f63d2 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -779,7 +779,7 @@ void SizeToIndexOp::getCanonicalizationPatterns( // YieldOp //===----------------------------------------------------------------------===// -static LogicalResult verify(YieldOp op) { +static LogicalResult verify(shape::YieldOp op) { auto *parentOp = op.getParentOp(); auto results = parentOp->getResults(); auto operands = op.getOperands(); diff --git a/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp b/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp index a84fad1f94602..ff74ce069e407 100644 --- a/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp @@ -45,7 +45,7 @@ NumElementsOpConverter::matchAndRewrite(NumElementsOp op, OpBuilder b = OpBuilder::atBlockEnd(body); Value product = b.create(loc, valueType, body->getArgument(1), body->getArgument(2)); - b.create(loc, product); + b.create(loc, product); rewriter.replaceOp(op, reduce.result()); return success(); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index b34257791d78e..65f8b83d9a718 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1312,7 +1312,6 @@ Optional DimOp::getConstantIndex() { } static LogicalResult verify(DimOp op) { - // Assume unknown index to be in range. Optional index = op.getConstantIndex(); if (!index.hasValue()) @@ -1634,6 +1633,67 @@ LogicalResult DmaWaitOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// DynamicTensorFromElementsOp +//===----------------------------------------------------------------------===// + +static ParseResult parseDynamicTensorFromElementsOp(OpAsmParser &parser, + OperationState &result) { + // Parse operands. + SmallVector dynamicExtents; + Type indexTy = parser.getBuilder().getIndexType(); + if (parser.parseOperandList(dynamicExtents) || + parser.resolveOperands(dynamicExtents, indexTy, result.operands)) + return failure(); + + // Parse body. + Region *body = result.addRegion(); + if (parser.parseRegion(*body, {}, {})) + return failure(); + + // Parse result type. + Type resultType; + if (parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonType(resultType)) + return failure(); + result.addTypes(resultType); + + return success(); +} + +static void print(OpAsmPrinter &p, DynamicTensorFromElementsOp op) { + p << "dynamic_tensor_from_elements " << op.dynamicExtents(); + p.printRegion(op.body()); + p.printOptionalAttrDict(op.getAttrs()); + p << " : " << op.getType(); +} + +static LogicalResult verify(DynamicTensorFromElementsOp op) { + // Ensure that the tensor type has as many dynamic dimensions as are specified + // by the operands. + RankedTensorType resultTy = op.getType().cast(); + if (op.getNumOperands() != resultTy.getNumDynamicDims()) + return op.emitError("must have as many index operands as dynamic extents " + "in the result type"); + + // Ensure that region arguments span the index space. + if (!llvm::all_of(op.body().getArgumentTypes(), + [](Type ty) { return ty.isIndex(); })) + return op.emitError("all body arguments must be index"); + if (op.body().getNumArguments() != resultTy.getRank()) + return op.emitError("must have one body argument per input dimension"); + + // Ensure that the region yields an element of the right type. + auto yieldOp = + llvm::cast(op.body().getBlocks().front().getTerminator()); + if (yieldOp.value().getType() != resultTy.getElementType()) + return op.emitOpError( + "body must be terminated with a `yield` operation of the tensor " + "element type"); + + return success(); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 7fa62ea34de19..d00e56297532c 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -537,6 +537,18 @@ Optional> ContractionOp::getShapeForUnroll() { // ExtractElementOp //===----------------------------------------------------------------------===// +void vector::ExtractElementOp::build(OpBuilder &builder, OperationState &result, + Value source, Value position) { + result.addOperands({source, position}); + result.addTypes(source.getType().cast().getElementType()); +} + +void vector::ExtractElementOp::build(OpBuilder &builder, OperationState &result, + Value source, int64_t position) { + Value pos = builder.create(result.location, position, 32); + build(builder, result, source, pos); +} + static LogicalResult verify(vector::ExtractElementOp op) { VectorType vectorType = op.getVectorType(); if (vectorType.getRank() != 1) @@ -1007,6 +1019,18 @@ static ParseResult parseShuffleOp(OpAsmParser &parser, OperationState &result) { // InsertElementOp //===----------------------------------------------------------------------===// +void InsertElementOp::build(OpBuilder &builder, OperationState &result, + Value source, Value dest, Value position) { + result.addOperands({source, dest, position}); + result.addTypes(dest.getType()); +} + +void InsertElementOp::build(OpBuilder &builder, OperationState &result, + Value source, Value dest, int64_t position) { + Value pos = builder.create(result.location, position, 32); + build(builder, result, source, dest, pos); +} + static LogicalResult verify(InsertElementOp op) { auto dstVectorType = op.getDestVectorType(); if (dstVectorType.getRank() != 1) diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 16d10e558b5eb..332bfbe2f4577 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -1347,7 +1347,8 @@ class ConstantMaskOpLowering : public OpRewritePattern { auto eltType = dstType.getElementType(); auto dimSizes = op.mask_dim_sizes(); int64_t rank = dimSizes.size(); - int64_t trueDim = dimSizes[0].cast().getInt(); + int64_t trueDim = std::min(dstType.getDimSize(0), + dimSizes[0].cast().getInt()); if (rank == 1) { // Express constant 1-D case in explicit vector form: @@ -1402,21 +1403,8 @@ class CreateMaskOpLowering : public OpRewritePattern { int64_t rank = dstType.getRank(); Value idx = op.getOperand(0); - if (rank == 1) { - // Express dynamic 1-D case in explicit vector form: - // mask = [0,1,..,n-1] < [a,a,..,a] - SmallVector values(dim); - for (int64_t d = 0; d < dim; d++) - values[d] = d; - Value indices = - rewriter.create(loc, rewriter.getI64VectorAttr(values)); - Value bound = - rewriter.create(loc, rewriter.getI64Type(), idx); - Value bounds = rewriter.create(loc, indices.getType(), bound); - rewriter.replaceOpWithNewOp(op, CmpIPredicate::slt, indices, - bounds); - return success(); - } + if (rank == 1) + return failure(); // leave for lowering VectorType lowType = VectorType::get(dstType.getShape().drop_front(), eltType); diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 67658a9ca33a1..a6246024a5aed 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -668,6 +668,25 @@ const AbstractOperation *AbstractOperation::lookup(StringRef opName, return nullptr; } +AbstractOperation::AbstractOperation( + StringRef name, Dialect &dialect, OperationProperties opProperties, + TypeID typeID, + ParseResult (&parseAssembly)(OpAsmParser &parser, OperationState &result), + void (&printAssembly)(Operation *op, OpAsmPrinter &p), + LogicalResult (&verifyInvariants)(Operation *op), + LogicalResult (&foldHook)(Operation *op, ArrayRef operands, + SmallVectorImpl &results), + void (&getCanonicalizationPatterns)(OwningRewritePatternList &results, + MLIRContext *context), + detail::InterfaceMap &&interfaceMap, bool (&hasTrait)(TypeID traitID)) + : name(Identifier::get(name, dialect.getContext())), dialect(dialect), + typeID(typeID), parseAssembly(parseAssembly), + printAssembly(printAssembly), verifyInvariants(verifyInvariants), + foldHook(foldHook), + getCanonicalizationPatterns(getCanonicalizationPatterns), + opProperties(opProperties), interfaceMap(std::move(interfaceMap)), + hasRawTrait(hasTrait) {} + /// Get the dialect that registered the type with the provided typeid. const AbstractType &AbstractType::lookup(TypeID typeID, MLIRContext *context) { auto &impl = context->getImpl(); diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index 67249b83b1047..b8f9e6c9fdfc4 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -45,11 +45,16 @@ StringRef OperationName::stripDialect() const { return splitName.second.empty() ? splitName.first : splitName.second; } -/// Return the name of this operation. This always succeeds. +/// Return the name of this operation. This always succeeds. StringRef OperationName::getStringRef() const { + return getIdentifier().strref(); +} + +/// Return the name of this operation as an identifier. This always succeeds. +Identifier OperationName::getIdentifier() const { if (auto *op = representation.dyn_cast()) return op->name; - return representation.get().strref(); + return representation.get(); } const AbstractOperation *OperationName::getAbstractOperation() const { diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index d6065f758fc13..48651a98561cf 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -863,8 +863,8 @@ class CustomOpAsmParser : public OpAsmParser { /// Emit a diagnostic at the specified location and return failure. InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message) override { emittedError = true; - return parser.emitError(loc, "custom op '" + opDefinition->name + "' " + - message); + return parser.emitError(loc, "custom op '" + opDefinition->name.strref() + + "' " + message); } llvm::SMLoc getCurrentLocation() override { diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index bb521633b5f3f..3ac41cde7911b 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -92,7 +92,9 @@ void VerifierPass::runOnOperation() { namespace mlir { namespace detail { struct OpPassManagerImpl { - OpPassManagerImpl(OperationName name, bool verifyPasses) + OpPassManagerImpl(Identifier identifier, bool verifyPasses) + : name(identifier), identifier(identifier), verifyPasses(verifyPasses) {} + OpPassManagerImpl(StringRef name, bool verifyPasses) : name(name), verifyPasses(verifyPasses) {} /// Merge the passes of this pass manager into the one provided. @@ -100,10 +102,8 @@ struct OpPassManagerImpl { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. - OpPassManager &nest(const OperationName &nestedName); - OpPassManager &nest(StringRef nestedName) { - return nest(OperationName(nestedName, getContext())); - } + OpPassManager &nest(Identifier nestedName); + OpPassManager &nest(StringRef nestedName); /// Add the given pass to this pass manager. If this pass has a concrete /// operation type, it must be the same type as this pass manager. @@ -117,13 +117,18 @@ struct OpPassManagerImpl { /// pass. void splitAdaptorPasses(); - /// Return an instance of the context. - MLIRContext *getContext() const { - return name.getAbstractOperation()->dialect.getContext(); + Identifier getOpName(MLIRContext &context) { + if (!identifier) + identifier = Identifier::get(name, &context); + return *identifier; } /// The name of the operation that passes of this pass manager operate on. - OperationName name; + StringRef name; + + /// The cached identifier (internalized in the context) for the name of the + /// operation that passes of this pass manager operate on. + Optional identifier; /// Flag that specifies if the IR should be verified after each pass has run. bool verifyPasses : 1; @@ -141,7 +146,14 @@ void OpPassManagerImpl::mergeInto(OpPassManagerImpl &rhs) { passes.clear(); } -OpPassManager &OpPassManagerImpl::nest(const OperationName &nestedName) { +OpPassManager &OpPassManagerImpl::nest(Identifier nestedName) { + OpPassManager nested(nestedName, verifyPasses); + auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); + addPass(std::unique_ptr(adaptor)); + return adaptor->getPassManagers().front(); +} + +OpPassManager &OpPassManagerImpl::nest(StringRef nestedName) { OpPassManager nested(nestedName, verifyPasses); auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); addPass(std::unique_ptr(adaptor)); @@ -152,7 +164,7 @@ void OpPassManagerImpl::addPass(std::unique_ptr pass) { // If this pass runs on a different operation than this pass manager, then // implicitly nest a pass manager for this operation. auto passOpName = pass->getOpName(); - if (passOpName && passOpName != name.getStringRef()) + if (passOpName && passOpName != name) return nest(*passOpName).addPass(std::move(pass)); passes.emplace_back(std::move(pass)); @@ -239,15 +251,10 @@ void OpPassManagerImpl::splitAdaptorPasses() { // OpPassManager //===----------------------------------------------------------------------===// -OpPassManager::OpPassManager(OperationName name, bool verifyPasses) - : impl(new OpPassManagerImpl(name, verifyPasses)) { - assert(name.getAbstractOperation() && - "OpPassManager can only operate on registered operations"); - assert(name.getAbstractOperation()->hasProperty( - OperationProperty::IsolatedFromAbove) && - "OpPassManager only supports operating on operations marked as " - "'IsolatedFromAbove'"); -} +OpPassManager::OpPassManager(Identifier name, bool verifyPasses) + : impl(new OpPassManagerImpl(name, verifyPasses)) {} +OpPassManager::OpPassManager(StringRef name, bool verifyPasses) + : impl(new OpPassManagerImpl(name, verifyPasses)) {} OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {} OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; } OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { @@ -275,7 +282,7 @@ OpPassManager::const_pass_iterator OpPassManager::end() const { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. -OpPassManager &OpPassManager::nest(const OperationName &nestedName) { +OpPassManager &OpPassManager::nest(Identifier nestedName) { return impl->nest(nestedName); } OpPassManager &OpPassManager::nest(StringRef nestedName) { @@ -294,11 +301,13 @@ size_t OpPassManager::size() const { return impl->passes.size(); } /// Returns the internal implementation instance. OpPassManagerImpl &OpPassManager::getImpl() { return *impl; } -/// Return an instance of the context. -MLIRContext *OpPassManager::getContext() const { return impl->getContext(); } +/// Return the operation name that this pass manager operates on. +StringRef OpPassManager::getOpName() const { return impl->name; } /// Return the operation name that this pass manager operates on. -const OperationName &OpPassManager::getOpName() const { return impl->name; } +Identifier OpPassManager::getOpName(MLIRContext &context) const { + return impl->getOpName(context); +} /// Prints out the given passes as the textual representation of a pipeline. static void printAsTextualPipeline(ArrayRef> passes, @@ -336,6 +345,14 @@ void OpPassManager::getDependentDialects(DialectRegistry &dialects) const { LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op, AnalysisManager am) { + if (!op->getName().getAbstractOperation()) + return op->emitOpError() + << "trying to schedule a pass on an unregistered operation"; + if (!op->getName().getAbstractOperation()->hasProperty( + OperationProperty::IsolatedFromAbove)) + return op->emitOpError() << "trying to schedule a pass on an operation not " + "marked as 'IsolatedFromAbove'"; + pass->passState.emplace(op, am); // Instrument before the pass has run. @@ -385,12 +402,22 @@ LogicalResult OpToOpPassAdaptor::runPipeline( /// Find an operation pass manager that can operate on an operation of the given /// type, or nullptr if one does not exist. static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, - const OperationName &name) { + StringRef name) { auto it = llvm::find_if( mgrs, [&](OpPassManager &mgr) { return mgr.getOpName() == name; }); return it == mgrs.end() ? nullptr : &*it; } +/// Find an operation pass manager that can operate on an operation of the given +/// type, or nullptr if one does not exist. +static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, + Identifier name, + MLIRContext &context) { + auto it = llvm::find_if( + mgrs, [&](OpPassManager &mgr) { return mgr.getOpName(context) == name; }); + return it == mgrs.end() ? nullptr : &*it; +} + OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr) { mgrs.emplace_back(std::move(mgr)); } @@ -417,8 +444,7 @@ void OpToOpPassAdaptor::mergeInto(OpToOpPassAdaptor &rhs) { // After coalescing, sort the pass managers within rhs by name. llvm::array_pod_sort(rhs.mgrs.begin(), rhs.mgrs.end(), [](const OpPassManager *lhs, const OpPassManager *rhs) { - return lhs->getOpName().getStringRef().compare( - rhs->getOpName().getStringRef()); + return lhs->getOpName().compare(rhs->getOpName()); }); } @@ -450,16 +476,18 @@ void OpToOpPassAdaptor::runOnOperationImpl() { for (auto ®ion : getOperation()->getRegions()) { for (auto &block : region) { for (auto &op : block) { - auto *mgr = findPassManagerFor(mgrs, op.getName()); + auto *mgr = findPassManagerFor(mgrs, op.getName().getIdentifier(), + *op.getContext()); if (!mgr) continue; + Identifier opName = mgr->getOpName(*getOperation()->getContext()); // Run the held pipeline over the current operation. if (instrumentor) - instrumentor->runBeforePipeline(mgr->getOpName(), parentInfo); + instrumentor->runBeforePipeline(opName, parentInfo); auto result = runPipeline(mgr->getPasses(), &op, am.nest(&op)); if (instrumentor) - instrumentor->runAfterPipeline(mgr->getOpName(), parentInfo); + instrumentor->runAfterPipeline(opName, parentInfo); if (failed(result)) return signalPassFailure(); @@ -494,8 +522,9 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl() { for (auto ®ion : getOperation()->getRegions()) { for (auto &block : region) { for (auto &op : block) { - // Add this operation iff the name matches the any of the pass managers. - if (findPassManagerFor(mgrs, op.getName())) + // Add this operation iff the name matches any of the pass managers. + if (findPassManagerFor(mgrs, op.getName().getIdentifier(), + getContext())) opAMPairs.emplace_back(&op, am.nest(&op)); } } @@ -531,15 +560,17 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl() { // Get the pass manager for this operation and execute it. auto &it = opAMPairs[nextID]; - auto *pm = findPassManagerFor(pms, it.first->getName()); + auto *pm = findPassManagerFor( + pms, it.first->getName().getIdentifier(), getContext()); assert(pm && "expected valid pass manager for operation"); + Identifier opName = pm->getOpName(*getOperation()->getContext()); if (instrumentor) - instrumentor->runBeforePipeline(pm->getOpName(), parentInfo); + instrumentor->runBeforePipeline(opName, parentInfo); auto pipelineResult = runPipeline(pm->getPasses(), it.first, it.second); if (instrumentor) - instrumentor->runAfterPipeline(pm->getOpName(), parentInfo); + instrumentor->runAfterPipeline(opName, parentInfo); // Drop this thread from being tracked by the diagnostic handler. // After this task has finished, the thread may be used outside of @@ -732,9 +763,9 @@ PassManager::runWithCrashRecovery(MutableArrayRef> passes, //===----------------------------------------------------------------------===// PassManager::PassManager(MLIRContext *ctx, bool verifyPasses) - : OpPassManager(OperationName(ModuleOp::getOperationName(), ctx), + : OpPassManager(Identifier::get(ModuleOp::getOperationName(), ctx), verifyPasses), - passTiming(false), localReproducer(false) {} + context(ctx), passTiming(false), localReproducer(false) {} PassManager::~PassManager() {} @@ -870,7 +901,7 @@ PassInstrumentor::~PassInstrumentor() {} /// See PassInstrumentation::runBeforePipeline for details. void PassInstrumentor::runBeforePipeline( - const OperationName &name, + Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo) { llvm::sys::SmartScopedLock instrumentationLock(impl->mutex); for (auto &instr : impl->instrumentations) @@ -879,7 +910,7 @@ void PassInstrumentor::runBeforePipeline( /// See PassInstrumentation::runAfterPipeline for details. void PassInstrumentor::runAfterPipeline( - const OperationName &name, + Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo) { llvm::sys::SmartScopedLock instrumentationLock(impl->mutex); for (auto &instr : llvm::reverse(impl->instrumentations)) diff --git a/mlir/lib/Pass/PassStatistics.cpp b/mlir/lib/Pass/PassStatistics.cpp index 6ef0d3bbea6a8..d909c98abf563 100644 --- a/mlir/lib/Pass/PassStatistics.cpp +++ b/mlir/lib/Pass/PassStatistics.cpp @@ -116,7 +116,7 @@ static void printResultsAsPipeline(raw_ostream &os, OpPassManager &pm) { // Print each of the children passes. for (OpPassManager &mgr : mgrs) { - auto name = ("'" + mgr.getOpName().getStringRef() + "' Pipeline").str(); + auto name = ("'" + mgr.getOpName() + "' Pipeline").str(); printPassEntry(os, indent, name); for (Pass &pass : mgr.getPasses()) printPass(indent + 2, &pass); diff --git a/mlir/lib/Pass/PassTiming.cpp b/mlir/lib/Pass/PassTiming.cpp index 71bf822a864bc..e3978751c11ca 100644 --- a/mlir/lib/Pass/PassTiming.cpp +++ b/mlir/lib/Pass/PassTiming.cpp @@ -165,9 +165,9 @@ struct PassTiming : public PassInstrumentation { ~PassTiming() override { print(); } /// Setup the instrumentation hooks. - void runBeforePipeline(const OperationName &name, + void runBeforePipeline(Identifier name, const PipelineParentInfo &parentInfo) override; - void runAfterPipeline(const OperationName &name, + void runAfterPipeline(Identifier name, const PipelineParentInfo &parentInfo) override; void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); } void runAfterPass(Pass *pass, Operation *) override; @@ -242,15 +242,15 @@ struct PassTiming : public PassInstrumentation { }; } // end anonymous namespace -void PassTiming::runBeforePipeline(const OperationName &name, +void PassTiming::runBeforePipeline(Identifier name, const PipelineParentInfo &parentInfo) { // We don't actually want to time the pipelines, they gather their total // from their held passes. getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline, - [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); }); + [&] { return ("'" + name.strref() + "' Pipeline").str(); }); } -void PassTiming::runAfterPipeline(const OperationName &name, +void PassTiming::runAfterPipeline(Identifier name, const PipelineParentInfo &parentInfo) { // Pop the timer for the pipeline. auto tid = llvm::get_threadid(); diff --git a/mlir/test/Bindings/Python/ir_types.py b/mlir/test/Bindings/Python/ir_types.py index a8f3a3840497a..4710bee27e37e 100644 --- a/mlir/test/Bindings/Python/ir_types.py +++ b/mlir/test/Bindings/Python/ir_types.py @@ -177,25 +177,200 @@ def testComplexType(): run(testComplexType) +# CHECK-LABEL: TEST: testConcreteShapedType +# Shaped type is not a kind of standard types, it is the base class for +# vectors, memrefs and tensors, so this test case uses an instance of vector +# to test the shaped type. The class hierarchy is preserved on the python side. +def testConcreteShapedType(): + ctx = mlir.ir.Context() + vector = mlir.ir.VectorType(ctx.parse_type("vector<2x3xf32>")) + # CHECK: element type: f32 + print("element type:", vector.element_type) + # CHECK: whether the given shaped type is ranked: True + print("whether the given shaped type is ranked:", vector.has_rank) + # CHECK: rank: 2 + print("rank:", vector.rank) + # CHECK: whether the shaped type has a static shape: True + print("whether the shaped type has a static shape:", vector.has_static_shape) + # CHECK: whether the dim-th dimension is dynamic: False + print("whether the dim-th dimension is dynamic:", vector.is_dynamic_dim(0)) + # CHECK: dim size: 3 + print("dim size:", vector.get_dim_size(1)) + # CHECK: is_dynamic_size: False + print("is_dynamic_size:", vector.is_dynamic_size(3)) + # CHECK: is_dynamic_stride_or_offset: False + print("is_dynamic_stride_or_offset:", vector.is_dynamic_stride_or_offset(1)) + # CHECK: isinstance(ShapedType): True + print("isinstance(ShapedType):", isinstance(vector, mlir.ir.ShapedType)) + +run(testConcreteShapedType) + +# CHECK-LABEL: TEST: testAbstractShapedType +# Tests that ShapedType operates as an abstract base class of a concrete +# shaped type (using vector as an example). +def testAbstractShapedType(): + ctx = mlir.ir.Context() + vector = mlir.ir.ShapedType(ctx.parse_type("vector<2x3xf32>")) + # CHECK: element type: f32 + print("element type:", vector.element_type) + +run(testAbstractShapedType) + # CHECK-LABEL: TEST: testVectorType def testVectorType(): ctx = mlir.ir.Context() f32 = mlir.ir.F32Type(ctx) shape = [2, 3] + loc = ctx.get_unknown_location() # CHECK: vector type: vector<2x3xf32> - print("vector type:", mlir.ir.VectorType.get_vector(shape, f32)) + print("vector type:", mlir.ir.VectorType.get_vector(shape, f32, loc)) - index = mlir.ir.IndexType(ctx) + none = mlir.ir.NoneType(ctx) try: - vector_invalid = mlir.ir.VectorType.get_vector(shape, index) + vector_invalid = mlir.ir.VectorType.get_vector(shape, none, loc) except ValueError as e: - # CHECK: invalid 'Type(index)' and expected floating point or integer type. + # CHECK: invalid 'Type(none)' and expected floating point or integer type. print(e) else: print("Exception not produced") run(testVectorType) +# CHECK-LABEL: TEST: testRankedTensorType +def testRankedTensorType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + shape = [2, 3] + loc = ctx.get_unknown_location() + # CHECK: ranked tensor type: tensor<2x3xf32> + print("ranked tensor type:", + mlir.ir.RankedTensorType.get_ranked_tensor(shape, f32, loc)) + + none = mlir.ir.NoneType(ctx) + try: + tensor_invalid = mlir.ir.RankedTensorType.get_ranked_tensor(shape, none, + loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testRankedTensorType) + +# CHECK-LABEL: TEST: testUnrankedTensorType +def testUnrankedTensorType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + loc = ctx.get_unknown_location() + unranked_tensor = mlir.ir.UnrankedTensorType.get_unranked_tensor(f32, loc) + # CHECK: unranked tensor type: tensor<*xf32> + print("unranked tensor type:", unranked_tensor) + try: + invalid_rank = unranked_tensor.rank + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_is_dynamic_dim = unranked_tensor.is_dynamic_dim(0) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_get_dim_size = unranked_tensor.get_dim_size(1) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + + none = mlir.ir.NoneType(ctx) + try: + tensor_invalid = mlir.ir.UnrankedTensorType.get_unranked_tensor(none, loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testUnrankedTensorType) + +# CHECK-LABEL: TEST: testMemRefType +def testMemRefType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + shape = [2, 3] + loc = ctx.get_unknown_location() + memref = mlir.ir.MemRefType.get_contiguous_memref(f32, shape, 2, loc) + # CHECK: memref type: memref<2x3xf32, 2> + print("memref type:", memref) + # CHECK: number of affine layout maps: 0 + print("number of affine layout maps:", memref.num_affine_maps) + # CHECK: memory space: 2 + print("memory space:", memref.memory_space) + + none = mlir.ir.NoneType(ctx) + try: + memref_invalid = mlir.ir.MemRefType.get_contiguous_memref(none, shape, 2, + loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testMemRefType) + +# CHECK-LABEL: TEST: testUnrankedMemRefType +def testUnrankedMemRefType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + loc = ctx.get_unknown_location() + unranked_memref = mlir.ir.UnrankedMemRefType.get_unranked_memref(f32, 2, loc) + # CHECK: unranked memref type: memref<*xf32, 2> + print("unranked memref type:", unranked_memref) + try: + invalid_rank = unranked_memref.rank + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_is_dynamic_dim = unranked_memref.is_dynamic_dim(0) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_get_dim_size = unranked_memref.get_dim_size(1) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + + none = mlir.ir.NoneType(ctx) + try: + memref_invalid = mlir.ir.UnrankedMemRefType.get_unranked_memref(none, 2, + loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testUnrankedMemRefType) + # CHECK-LABEL: TEST: testTupleType def testTupleType(): ctx = mlir.ir.Context() diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir index b7e11d74996bd..9651946118a67 100644 --- a/mlir/test/Conversion/GPUToSPIRV/if.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir @@ -3,9 +3,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @main(%arg0 : memref<10xf32>, %arg1 : i1) { %c0 = constant 1 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir index da57db15bedce..b9ae8bdfeacdc 100644 --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -3,9 +3,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @load_store(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>) { %c0 = constant 0 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir index 2205c60f875f5..c181e1956f83a 100644 --- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir @@ -3,9 +3,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @loop(%arg0 : memref<10xf32>, %arg1 : memref<10xf32>) { %c0 = constant 1 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir index 1b5b4d52d8b88..0e2a45f9bf3cb 100644 --- a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir @@ -2,10 +2,7 @@ module attributes { gpu.container_module, - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { gpu.module @kernels { // CHECK-LABEL: spv.module @{{.*}} Physical64 OpenCL diff --git a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir index cebd541977ef1..d437ab160b927 100644 --- a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir +++ b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir @@ -16,11 +16,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { // CHECK: spv.globalVariable @@ -78,11 +74,7 @@ func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) { // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} @@ -111,11 +103,7 @@ func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) attributes { spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>} @@ -146,11 +134,7 @@ func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { func @single_workgroup_reduction(%input: memref<16x8xi32>, %output: memref<16xi32>) attributes { spv.entry_point_abi = {local_size = dense<[16, 8, 1]>: vector<3xi32>} diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir deleted file mode 100644 index cc384496dff05..0000000000000 --- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir +++ /dev/null @@ -1,132 +0,0 @@ -// RUN: mlir-opt -convert-shape-to-scf -split-input-file %s | FileCheck %s - -// CHECK-LABEL: @shape_reduce -// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index -func @shape_reduce(%shape : tensor) -> index { - %init = constant 1 : index - %num_elements = shape.reduce(%shape, %init) : tensor -> index { - ^bb0(%index : index, %extent : index, %acc: index): - %new_acc = muli %acc, %extent : index - shape.yield %new_acc : index - } - return %num_elements : index -} -// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index -// CHECK-NEXT: %[[C0:.*]] = constant 0 : index -// CHECK-NEXT: %[[C1:.*]] = constant 1 : index -// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor -// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) -// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] -// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index -// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index -// CHECK-NEXT: } -// CHECK-NEXT: return %[[RESULT]] : index - -// ----- - -// Don't lower `shape_of` for result type of `shape.shape`. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of(%arg : tensor<*xf32>) { - // CHECK: shape.shape - %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for unranked tensors. -// CHECK-LABEL: @shape_of_unranked -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of_unranked(%arg : tensor<*xf32>) { - // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref - %shape = shape.shape_of %arg : tensor<*xf32> -> tensor - return -} - -// ----- - -// CHECK-LABEL: @shape_eq -// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 -func @shape_eq(%a : tensor, %b : tensor) -> i1 { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor - // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor - // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] - // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[INIT:.*]] = constant true - // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { - // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor - // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor - // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] - // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] - // CHECK: scf.yield %[[CONJ_NEXT]] : i1 - // CHECK: } - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } else { - // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } - // CHECK: return %[[SHAPE_EQ]] : i1 - %result = shape.shape_eq %a, %b : tensor, tensor - return %result : i1 -} - -// ----- - -// Don't lower `shape.broadcast` if a `shape.shape` type is involved. -// CHECK-LABEL: @broadcast -func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { - // CHECK: shape.broadcast - %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape - return %c : !shape.shape -} - -// ----- - -// CHECK-LABEL: @broadcast -// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) -func @broadcast(%a : tensor, %b : tensor) { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor - // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor - // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] - // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { - // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor - // CHECK: } else { - // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor - // CHECK: } - // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref - // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index - // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { - // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { - // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index - // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { - // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index - // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor - // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index - // CHECK: } else { - // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index - // CHECK: } - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref - %0 = shape.broadcast %a, %b - : tensor, tensor -> tensor - return -} - diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index b0fb5bac9071b..bf8e74e5143ed 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -26,46 +26,6 @@ func @binary_ops_on_size(%lhs : !shape.size, %rhs : !shape.size) { // ----- -// Don't lower `shape_of` with `shape.shape` type. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for statically shaped tensor. -// CHECK-LABEL: @shape_of_stat -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor - return -} - -// ----- - -// Lower `shape_of` for dynamically shaped tensor. -// CHECK-LABEL: @shape_of_dyn -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) -func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C5:.*]] = constant 5 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor - return -} - -// ----- - // Convert `rank` to `dim` of the first dimension. // CHECK-LABEL: @rank // CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index @@ -190,3 +150,174 @@ func @to_extent_tensor(%arg: tensor) -> tensor<3xindex> { // CHECK: return %[[RES]] return %casted : tensor<3xindex> } + +// CHECK-LABEL: @shape_reduce +// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index +func @shape_reduce(%shape : tensor) -> index { + %init = constant 1 : index + %num_elements = shape.reduce(%shape, %init) : tensor -> index { + ^bb0(%index : index, %extent : index, %acc: index): + %new_acc = muli %acc, %extent : index + shape.yield %new_acc : index + } + return %num_elements : index +} +// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index +// CHECK-NEXT: %[[C0:.*]] = constant 0 : index +// CHECK-NEXT: %[[C1:.*]] = constant 1 : index +// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor +// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) +// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] +// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index +// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index +// CHECK-NEXT: } +// CHECK-NEXT: return %[[RESULT]] : index + +// ----- + +// Don't lower `shape_of` for result type of `shape.shape`. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of(%arg : tensor<*xf32>) { + // CHECK: shape.shape + %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for unranked tensors. +// CHECK-LABEL: @shape_of_unranked +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of_unranked(%arg : tensor<*xf32>) { + // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> + // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { + // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref + // CHECK: } + // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + %shape = shape.shape_of %arg : tensor<*xf32> -> tensor + return +} + +// ----- + +// Don't lower `shape_of` with `shape.shape` type. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for statically shaped tensor. +// CHECK-LABEL: @shape_of_stat +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[C3:.*]] = constant 3 : index + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor + return +} + +// ----- + +// Lower `shape_of` for dynamically shaped tensor. +// CHECK-LABEL: @shape_of_dyn +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) +func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C5:.*]] = constant 5 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor + return +} + +// ----- + +// CHECK-LABEL: @shape_eq +// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 +func @shape_eq(%a : tensor, %b : tensor) -> i1 { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor + // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor + // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] + // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[INIT:.*]] = constant true + // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { + // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor + // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor + // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] + // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] + // CHECK: scf.yield %[[CONJ_NEXT]] : i1 + // CHECK: } + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } else { + // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } + // CHECK: return %[[SHAPE_EQ]] : i1 + %result = shape.shape_eq %a, %b : tensor, tensor + return %result : i1 +} + +// ----- + +// Don't lower `shape.broadcast` if a `shape.shape` type is involved. +// CHECK-LABEL: @broadcast +func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { + // CHECK: shape.broadcast + %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape + return %c : !shape.shape +} + +// ----- + +// CHECK-LABEL: @broadcast +// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) +func @broadcast(%a : tensor, %b : tensor) { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor + // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor + // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] + // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { + // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor + // CHECK: } else { + // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor + // CHECK: } + // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref + // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index + // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { + // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { + // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index + // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { + // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index + // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor + // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index + // CHECK: } else { + // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index + // CHECK: } + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref + %0 = shape.broadcast %a, %b + : tensor, tensor -> tensor + return +} + diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir index b428d37a36167..5cccca3795b3b 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -130,8 +130,7 @@ func @aligned_1d_alloc() -> memref<42xf32> { // CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 -// CHECK-NEXT: %[[alignmentMinus1:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 -// CHECK-NEXT: %[[allocsize:.*]] = llvm.sub %[[alignmentMinus1]], %[[one_1]] : !llvm.i64 +// CHECK-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> @@ -154,8 +153,7 @@ func @aligned_1d_alloc() -> memref<42xf32> { // BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[alignmentMinus1:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 -// BAREPTR-NEXT: %[[allocsize:.*]] = llvm.sub %[[alignmentMinus1]], %[[one_1]] : !llvm.i64 +// BAREPTR-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> diff --git a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir index 14ce4699a4550..ccd8c02e255ac 100644 --- a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir @@ -6,9 +6,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_workgroup_mem(%arg0 : index, %arg1 : index) { @@ -34,9 +32,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_workgroup_mem(%arg0 : index, %arg1 : index) { @@ -65,9 +61,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @two_allocs() { @@ -88,9 +82,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @two_allocs_vector() { @@ -112,9 +104,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_dynamic_workgroup_mem(%arg0 : index) { @@ -129,9 +119,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_mem() { @@ -146,9 +134,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_dynamic_workgroup_mem(%arg0 : memref<4x?xf32, 3>) { @@ -163,9 +149,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_mem(%arg0 : memref<4x5xf32>) { diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir index 1b83af1be7551..ce38ba8b3f5e6 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir @@ -6,9 +6,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // Check integer operation conversions. @@ -146,10 +144,7 @@ func @unsupported_2x2elem_vector(%arg0: vector<2x2xi32>) { // Check that types are converted to 32-bit when no special capabilities. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @int_vector23 @@ -177,10 +172,7 @@ func @float_scalar(%arg0: f16, %arg1: f64) { // Check that types are converted to 32-bit when no special capabilities that // are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { func @int_vector4_invalid(%arg0: vector<4xi64>) { @@ -199,10 +191,7 @@ func @int_vector4_invalid(%arg0: vector<4xi64>) { //===----------------------------------------------------------------------===// module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @bitwise_scalar @@ -348,9 +337,7 @@ func @boolcmpi(%arg0 : i1, %arg1 : i1) { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: @constant @@ -412,10 +399,7 @@ func @constant_64bit() { // Check that constants are converted to 32-bit when no special capability. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @constant_16bit @@ -498,9 +482,7 @@ func @unsupported_cases() { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: index_cast1 @@ -631,10 +613,7 @@ func @fptosi2(%arg0 : f16) -> i16 { // Checks that cast types will be adjusted when no special capabilities for // non-32-bit scalar types. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @fpext1 @@ -682,9 +661,8 @@ func @sitofp(%arg0 : i64) { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { //===----------------------------------------------------------------------===// @@ -750,9 +728,7 @@ func @load_store_zero_rank_int(%arg0: memref, %arg1: memref) { // TODO: Test i1 and i64 types. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: @load_i8 @@ -895,9 +871,7 @@ func @store_f32(%arg0: memref, %value: f32) { module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_storage_buffer_storage_class, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: @load_i8 diff --git a/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir index 5ea44c18c6183..66b2ba97bea1e 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir @@ -7,10 +7,7 @@ // Check that non-32-bit integer types are converted to 32-bit types if the // corresponding capabilities are not available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @integer8 @@ -38,10 +35,7 @@ func @integer64(%arg0: i64, %arg1: si64, %arg2: ui64) { return } // Check that non-32-bit integer types are kept untouched if the corresponding // capabilities are available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @integer8 @@ -68,10 +62,7 @@ func @integer64(%arg0: i64, %arg1: si64, %arg2: ui64) { return } // Check that weird bitwidths are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-NOT: spv.func @integer4 @@ -92,10 +83,7 @@ func @integer42(%arg0: i42) { return } // The index type is always converted into i32. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @index_type @@ -113,10 +101,7 @@ func @index_type(%arg0: index) { return } // Check that non-32-bit float types are converted to 32-bit types if the // corresponding capabilities are not available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @float16 @@ -134,10 +119,7 @@ func @float64(%arg0: f64) { return } // Check that non-32-bit float types are kept untouched if the corresponding // capabilities are available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @float16 @@ -154,10 +136,7 @@ func @float64(%arg0: f64) { return } // Check that bf16 is not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-NOT: spv.func @bf16_type @@ -174,10 +153,7 @@ func @bf16_type(%arg0: bf16) { return } // Check that capabilities for scalar types affects vector types too: no special // capabilities available means using turning element types to 32-bit. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @int_vector @@ -206,9 +182,7 @@ func @float_vector( // special capabilities means keep vector types untouched. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @int_vector @@ -235,10 +209,7 @@ func @float_vector( // Check that 1- or > 4-element vectors are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-NOT: spv.func @one_element_vector @@ -258,9 +229,7 @@ func @large_vector(%arg0: vector<1024xi32>) { return } // Check memory spaces. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: func @memref_mem_space @@ -285,10 +254,7 @@ func @memref_mem_space( // Check that boolean memref is not supported at the moment. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @memref_type({{%.*}}: memref<3xi1>) @@ -304,10 +270,7 @@ func @memref_type(%arg0: memref<3xi1>) { // requires special capability and extension: convert them to 32-bit if not // satisfied. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer @@ -352,9 +315,7 @@ func @memref_16bit_Output(%arg4: memref<16xf16, 10>) { return } module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_8bit_storage, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: spv.func @memref_8bit_PushConstant @@ -379,9 +340,7 @@ func @memref_16bit_PushConstant( module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_8bit_storage, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer @@ -406,9 +365,7 @@ func @memref_16bit_StorageBuffer( module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_8bit_storage, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: spv.func @memref_8bit_Uniform @@ -432,9 +389,7 @@ func @memref_16bit_Uniform( // and extension is available. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_16bit_Input @@ -452,9 +407,7 @@ func @memref_16bit_Output(%arg4: memref<16xi16, 10>) { return } // Check that memref offset and strides affect the array size. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_offset_strides @@ -488,10 +441,7 @@ func @memref_offset_strides( // Dynamic shapes module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // Check that unranked shapes are not supported. @@ -512,10 +462,7 @@ func @dynamic_dim_memref(%arg0: memref<8x?xi32>, // Vector types module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @memref_vector @@ -539,10 +486,7 @@ func @dynamic_dim_memref_vector(%arg0: memref<8x?xvector<4xi32>>, // Vector types, check that sizes not available in SPIR-V are not transformed. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @memref_vector_wrong_size @@ -562,9 +506,7 @@ func @memref_vector_wrong_size( // Check that tensor element types are kept untouched with proper capabilities. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @int_tensor_types @@ -595,10 +537,7 @@ func @float_tensor_types( // Check that tensor element types are changed to 32-bit without capabilities. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @int_tensor_types @@ -629,10 +568,7 @@ func @float_tensor_types( // Check that dynamic shapes are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @unranked_tensor diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir new file mode 100644 index 0000000000000..ec05e349897a7 --- /dev/null +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -0,0 +1,48 @@ +// RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=1' | FileCheck %s --check-prefix=CMP32 +// RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=0' | FileCheck %s --check-prefix=CMP64 + +// CMP32-LABEL: llvm.func @genbool_var_1d( +// CMP32-SAME: %[[A:.*]]: !llvm.i64) +// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : !llvm.vec<11 x i32> +// CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : !llvm.i64 to !llvm.i32 +// CMP32: %[[T2:.*]] = llvm.mlir.undef : !llvm.vec<11 x i32> +// CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 +// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : !llvm.i32] : !llvm.vec<11 x i32> +// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i32>, !llvm.vec<11 x i32> +// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : !llvm.vec<11 x i32> +// CMP32: llvm.return %[[T6]] : !llvm.vec<11 x i1> + +// CMP64-LABEL: llvm.func @genbool_var_1d( +// CMP64-SAME: %[[A:.*]]: !llvm.i64) +// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : !llvm.vec<11 x i64> +// CMP64: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<11 x i64> +// CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 +// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm.vec<11 x i64> +// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i64>, !llvm.vec<11 x i64> +// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : !llvm.vec<11 x i64> +// CMP64: llvm.return %[[T5]] : !llvm.vec<11 x i1> + +func @genbool_var_1d(%arg0: index) -> vector<11xi1> { + %0 = vector.create_mask %arg0 : vector<11xi1> + return %0 : vector<11xi1> +} + +// CMP32-LABEL: llvm.func @transfer_read_1d +// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : !llvm.vec<16 x i32> +// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i32> +// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i32> +// CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} +// CMP32: llvm.return %[[L]] : !llvm.vec<16 x float> + +// CMP64-LABEL: llvm.func @transfer_read_1d +// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : !llvm.vec<16 x i64> +// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i64> +// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i64> +// CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} +// CMP64: llvm.return %[[L]] : !llvm.vec<16 x float> + +func @transfer_read_1d(%A : memref, %i: index) -> vector<16xf32> { + %d = constant -1.0: f32 + %f = vector.transfer_read %A[%i], %d {permutation_map = affine_map<(d0) -> (d0)>} : memref, vector<16xf32> + return %f : vector<16xf32> +} diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index d35c7fa645b7f..e0800c2fd2272 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -749,10 +749,12 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : // CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. -// CHECK: %[[linearIndex:.*]] = llvm.mlir.constant( -// CHECK-SAME: dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : +// CHECK: %[[linearIndex:.*]] = llvm.mlir.constant(dense +// CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : // CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. @@ -770,8 +772,6 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : @@ -799,9 +799,9 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. -// CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant( -// CHECK-SAME: dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> +// CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant(dense +// CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : +// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, @@ -832,6 +832,8 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) } // CHECK-LABEL: func @transfer_read_2d_to_1d // CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: !llvm.i64, %[[BASE_1:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm.vec<17 x float> +// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> @@ -847,8 +849,6 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) // Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] // Here we check we properly use %DIM[1] -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 5c2da799d861e..240925baf3d8c 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -19,7 +19,7 @@ func @materialize_read_1d() { // CHECK: %[[FILTERED1:.*]] = select // CHECK: {{.*}} = select // CHECK: %[[FILTERED2:.*]] = select - // CHECK-NEXT: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> + // CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> } } return @@ -58,6 +58,7 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { %f0 = constant 0.0: f32 + // CHECK-DAG: %[[ALLOC:.*]] = alloca() : memref<5x4xvector<3xf32>> // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C1:.*]] = constant 1 : index // CHECK-DAG: %[[C3:.*]] = constant 3 : index @@ -68,7 +69,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 { - // CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32> // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { @@ -97,13 +97,15 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: {{.*}} = select // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index // CHECK-NEXT: %[[L3:.*]] = select + // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // - // CHECK-NEXT: {{.*}} = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-NEXT: store {{.*}}, %[[ALLOC]][%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32> + // CHECK-DAG: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref + // CHECK-DAG: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } - // CHECK-NEXT: dealloc %[[ALLOC]] : memref<5x4x3xf32> // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -134,6 +136,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { + // CHECK-DAG: %[[ALLOC:.*]] = alloca() : memref<5x4xvector<3xf32>> // CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32> // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C1:.*]] = constant 1 : index @@ -145,8 +148,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 { // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 { - // CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32> - // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32> + // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4xvector<3xf32>> // CHECK: store %{{.*}}, {{.*}} : memref> // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { @@ -177,13 +179,14 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index // CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index + // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // - // CHECK-NEXT: {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32> - // CHECK: store {{.*}}, {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } - // CHECK-NEXT: dealloc {{.*}} : memref<5x4x3xf32> // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -232,7 +235,7 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<3x %f7 = constant 7.0: f32 // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> - // CHECK-DAG: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> + // CHECK-DAG: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>> // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref // CHECK: affine.for %[[I:.*]] = 0 to 3 { @@ -307,7 +310,7 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<3x // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32> func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<3x15xf32>) { // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> + // CHECK: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>> // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref> // CHECK: store %[[vec]], %[[vmemref]][] : memref> // CHECK: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref @@ -363,7 +366,7 @@ func @transfer_write_progressive(%A : memref, %base: index, %vec: vecto // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32> func @transfer_write_progressive_unmasked(%A : memref, %base: index, %vec: vector<3x15xf32>) { // CHECK-NOT: scf.if - // CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> + // CHECK-NEXT: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>> // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref> // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref> // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 { @@ -416,7 +419,7 @@ func @transfer_read_minor_identity(%A : memref) -> vector<3x3xf32> // CHECK: %[[cst:.*]] = constant 0.000000e+00 : f32 // CHECK: %[[c2:.*]] = constant 2 : index // CHECK: %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32> -// CHECK: %[[m:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<3xf32>> +// CHECK: %[[m:.*]] = alloca() : memref<3xvector<3xf32>> // CHECK: %[[d:.*]] = dim %[[A]], %[[c2]] : memref // CHECK: affine.for %[[arg1:.*]] = 0 to 3 { // CHECK: %[[cmp:.*]] = cmpi "slt", %[[arg1]], %[[d]] : index @@ -445,7 +448,7 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref) // CHECK: %[[c0:.*]] = constant 0 : index // CHECK: %[[c2:.*]] = constant 2 : index -// CHECK: %[[m:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<3xf32>> +// CHECK: %[[m:.*]] = alloca() : memref<3xvector<3xf32>> // CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref> // CHECK: store %[[A]], %[[cast]][] : memref> // CHECK: %[[d:.*]] = dim %[[B]], %[[c2]] : memref @@ -457,3 +460,28 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref (d0 + d1 * 8)>>) -> vector<4xf32> { + %c0 = constant 0 : index + %f0 = constant 0.0 : f32 + %0 = vector.transfer_read %A[%c0, %c0], %f0 + : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>, vector<4xf32> + return %0 : vector<4xf32> +} + +// CHECK-LABEL: transfer_read_strided( +// CHECK: scf.for +// CHECK: load + +func @transfer_write_strided(%A : vector<4xf32>, %B : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) { + %c0 = constant 0 : index + vector.transfer_write %A, %B[%c0, %c0] : + vector<4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>> + return +} + +// CHECK-LABEL: transfer_write_strided( +// CHECK: scf.for +// CHECK: store diff --git a/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir b/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir new file mode 100644 index 0000000000000..9d9f06177e3b3 --- /dev/null +++ b/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir @@ -0,0 +1,60 @@ +// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=128" -split-input-file | FileCheck %s + +// Specific tests to check vectorization of uniform/divergent values. + +// CHECK-LABEL: @uniform_arg +// CHECK-SAME: %[[in:.*]]: memref<512xf32>, +// CHECK-SAME: %[[uniform:.*]]: f32 +func @uniform_arg(%in : memref<512xf32>, %uniform : f32) { + affine.for %i = 0 to 512 { + %ld = affine.load %in[%i] : memref<512xf32> + %add = addf %ld, %uniform : f32 + } + return +} + +// CHECK-NEXT: %[[bcast:.*]] = vector.broadcast %[[uniform]] : f32 to vector<128xf32> +// CHECK-NEXT: affine.for +// CHECK: addf %{{.*}}, %[[bcast]] : vector<128xf32> + +// ----- + +// CHECK-LABEL: @multi_use_uniform_arg +// CHECK-SAME: %[[in:.*]]: memref<512xf32> +// CHECK-SAME: %[[uniform:.*]]: f32 +func @multi_use_uniform_arg(%in : memref<512xf32>, %uniform : f32) { + affine.for %i = 0 to 512 { + %ld = affine.load %in[%i] : memref<512xf32> + %user0 = addf %ld, %uniform : f32 + %user1 = addf %ld, %uniform : f32 + } + return +} + +// CHECK-NEXT: %[[bcast:.*]] = vector.broadcast %[[uniform]] : f32 to vector<128xf32> +// CHECK-NOT: vector.broadcast +// CHECK-NEXT: affine.for +// CHECK: addf %{{.*}}, %[[bcast]] : vector<128xf32> +// CHECK: addf %{{.*}}, %[[bcast]] : vector<128xf32> + +// ----- + +// CHECK-LABEL: @uniform_load +func @uniform_load(%A : memref, %C : memref) { + %c0 = constant 0 : index + %N = dim %A, %c0 : memref + affine.for %i = 0 to %N { + %uniform_ld = affine.load %A[%i, %i] : memref + affine.for %j = 0 to %N { + %b = affine.load %A[%i, %j] : memref + %c = addf %uniform_ld, %b : f32 + } + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: %[[uniform_ld:.*]] = affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-NEXT: %[[bcast:.*]] = vector.broadcast %[[uniform_ld]] : f32 to vector<128xf32> +// CHECK-NEXT: affine.for +// CHECK: addf %[[bcast]], %{{.*}} : vector<128xf32> diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir index bbeced633bcd8..66429907205e9 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -396,25 +396,6 @@ func @vec_rejected_10(%A : memref, %B : memref) { return } -// This should not vectorize and should not crash. -// CHECK-LABEL: @vec_rejected_11 -func @vec_rejected_11(%A : memref, %C : memref) { - %c0 = constant 0 : index - %N = dim %A, %c0 : memref - affine.for %i = 0 to %N { -// CHECK-NOT: vector - %a = affine.load %A[%i, %i] : memref // not vectorized - affine.for %j = 0 to %N { - %b = affine.load %A[%i, %j] : memref // may be vectorized -// CHECK-NOT: vector - %c = addf %a, %b : f32 // not vectorized because %a wasn't -// CHECK-NOT: vector - affine.store %c, %C[%i, %j] : memref // not vectorized because %c wasn't - } - } - return -} - // This should not vectorize due to the sequential dependence in the scf. // CHECK-LABEL: @vec_rejected_sequential func @vec_rejected_sequential(%A : memref) { diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir index 4c9c0dbbf774b..4256dcc0614bf 100644 --- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir @@ -22,6 +22,8 @@ func @nested_loops_both_having_invariant_code() { return } +// ----- + // The store-load forwarding can see through affine apply's since it relies on // dependence information. // CHECK-LABEL: func @store_affine_apply @@ -36,12 +38,14 @@ func @store_affine_apply() -> memref<10xf32> { // CHECK: %cst = constant 7.000000e+00 : f32 // CHECK-NEXT: %0 = alloc() : memref<10xf32> // CHECK-NEXT: affine.for %arg0 = 0 to 10 { -// CHECK-NEXT: %1 = affine.apply #map3(%arg0) +// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%arg0) // CHECK-NEXT: affine.store %cst, %0[%1] : memref<10xf32> // CHECK-NEXT: } // CHECK-NEXT: return %0 : memref<10xf32> } +// ----- + func @nested_loops_code_invariant_to_both() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -61,6 +65,8 @@ func @nested_loops_code_invariant_to_both() { return } +// ----- + func @single_loop_nothing_invariant() { %m1 = alloc() : memref<10xf32> %m2 = alloc() : memref<10xf32> @@ -82,6 +88,8 @@ func @single_loop_nothing_invariant() { return } +// ----- + func @invariant_code_inside_affine_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -98,7 +106,7 @@ func @invariant_code_inside_affine_if() { // CHECK: %0 = alloc() : memref<10xf32> // CHECK-NEXT: %cst = constant 8.000000e+00 : f32 // CHECK-NEXT: affine.for %arg0 = 0 to 10 { - // CHECK-NEXT: %1 = affine.apply #map3(%arg0) + // CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%arg0) // CHECK-NEXT: affine.if #set0(%arg0, %1) { // CHECK-NEXT: %2 = addf %cst, %cst : f32 // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32> @@ -108,6 +116,7 @@ func @invariant_code_inside_affine_if() { return } +// ----- func @dependent_stores() { %m = alloc() : memref<10xf32> @@ -137,6 +146,8 @@ func @dependent_stores() { return } +// ----- + func @independent_stores() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -165,6 +176,8 @@ func @independent_stores() { return } +// ----- + func @load_dependent_store() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -192,6 +205,8 @@ func @load_dependent_store() { return } +// ----- + func @load_after_load() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -219,6 +234,8 @@ func @load_after_load() { return } +// ----- + func @invariant_affine_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -244,6 +261,8 @@ func @invariant_affine_if() { return } +// ----- + func @invariant_affine_if2() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -271,6 +290,8 @@ func @invariant_affine_if2() { return } +// ----- + func @invariant_affine_nested_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -303,6 +324,8 @@ func @invariant_affine_nested_if() { return } +// ----- + func @invariant_affine_nested_if_else() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -339,6 +362,8 @@ func @invariant_affine_nested_if_else() { return } +// ----- + func @invariant_affine_nested_if_else2() { %m = alloc() : memref<10xf32> %m2 = alloc() : memref<10xf32> @@ -375,6 +400,7 @@ func @invariant_affine_nested_if_else2() { return } +// ----- func @invariant_affine_nested_if2() { %m = alloc() : memref<10xf32> @@ -406,6 +432,8 @@ func @invariant_affine_nested_if2() { return } +// ----- + func @invariant_affine_for_inside_affine_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -438,6 +466,7 @@ func @invariant_affine_for_inside_affine_if() { return } +// ----- func @invariant_constant_and_load() { %m = alloc() : memref<100xf32> @@ -459,6 +488,7 @@ func @invariant_constant_and_load() { return } +// ----- func @nested_load_store_same_memref() { %m = alloc() : memref<10xf32> @@ -483,6 +513,7 @@ func @nested_load_store_same_memref() { return } +// ----- func @nested_load_store_same_memref2() { %m = alloc() : memref<10xf32> @@ -505,3 +536,80 @@ func @nested_load_store_same_memref2() { return } + +// ----- + +// CHECK-LABEL: func @do_not_hoist_dependent_side_effect_free_op +func @do_not_hoist_dependent_side_effect_free_op(%arg0: memref<10x512xf32>) { + %0 = alloca() : memref<1xf32> + %cst = constant 8.0 : f32 + affine.for %i = 0 to 512 { + affine.for %j = 0 to 10 { + %5 = affine.load %arg0[%i, %j] : memref<10x512xf32> + %6 = affine.load %0[0] : memref<1xf32> + %add = addf %5, %6 : f32 + affine.store %add, %0[0] : memref<1xf32> + } + %3 = affine.load %0[0] : memref<1xf32> + %4 = mulf %3, %cst : f32 // It shouldn't be hoisted. + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: affine.for +// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.load +// CHECK-NEXT: addf +// CHECK-NEXT: affine.store +// CHECK-NEXT: } +// CHECK-NEXT: affine.load +// CHECK-NEXT: mulf +// CHECK-NEXT: } + +// ----- + +// CHECK-LABEL: func @vector_loop_nothing_invariant +func @vector_loop_nothing_invariant() { + %m1 = alloc() : memref<40xf32> + %m2 = alloc() : memref<40xf32> + affine.for %arg0 = 0 to 10 { + %v0 = affine.vector_load %m1[%arg0*4] : memref<40xf32>, vector<4xf32> + %v1 = affine.vector_load %m2[%arg0*4] : memref<40xf32>, vector<4xf32> + %v2 = addf %v0, %v1 : vector<4xf32> + affine.vector_store %v2, %m1[%arg0*4] : memref<40xf32>, vector<4xf32> + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: addf +// CHECK-NEXT: affine.vector_store +// CHECK-NEXT: } + +// ----- + +// CHECK-LABEL: func @vector_loop_all_invariant +func @vector_loop_all_invariant() { + %m1 = alloc() : memref<4xf32> + %m2 = alloc() : memref<4xf32> + %m3 = alloc() : memref<4xf32> + affine.for %arg0 = 0 to 10 { + %v0 = affine.vector_load %m1[0] : memref<4xf32>, vector<4xf32> + %v1 = affine.vector_load %m2[0] : memref<4xf32>, vector<4xf32> + %v2 = addf %v0, %v1 : vector<4xf32> + affine.vector_store %v2, %m3[0] : memref<4xf32>, vector<4xf32> + } + return +} + +// CHECK: alloc() +// CHECK-NEXT: alloc() +// CHECK-NEXT: alloc() +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: addf +// CHECK-NEXT: affine.vector_store +// CHECK-NEXT: affine.for diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 1f8b1600873c9..c19795e98b686 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -394,7 +394,7 @@ func @nvvm_invalid_mma_7(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, // CHECK-LABEL: @atomicrmw_expected_ptr func @atomicrmw_expected_ptr(%f32 : !llvm.float) { - // expected-error@+1 {{expected LLVM IR pointer type for operand #0}} + // expected-error@+1 {{operand #0 must be LLVM pointer to floating point LLVM type or LLVM integer type}} %0 = "llvm.atomicrmw"(%f32, %f32) {bin_op=11, ordering=1} : (!llvm.float, !llvm.float) -> !llvm.float llvm.return } @@ -448,7 +448,7 @@ func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { // CHECK-LABEL: @cmpxchg_expected_ptr func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { - // expected-error@+1 {{expected LLVM IR pointer type for operand #0}} + // expected-error@+1 {{op operand #0 must be LLVM pointer to LLVM integer type or LLVM pointer type}} %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (!llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, i1)> llvm.return } diff --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir index a08a2f1e585c6..3b76f8a3139c1 100644 --- a/mlir/test/Dialect/Linalg/tile_conv.mlir +++ b/mlir/test/Dialect/Linalg/tile_conv.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)> -// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 90, d0 * -30 + s1)> +// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s1)> // TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> // TILE-23004-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> diff --git a/mlir/test/Dialect/Linalg/tile_simple_conv.mlir b/mlir/test/Dialect/Linalg/tile_simple_conv.mlir index f854f7570fef3..b71f4bc0d3a8f 100644 --- a/mlir/test/Dialect/Linalg/tile_simple_conv.mlir +++ b/mlir/test/Dialect/Linalg/tile_simple_conv.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 4, -d0 + s1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s1)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s1)> // CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> // CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> @@ -46,4 +46,4 @@ func @conv(%arg0 : memref, %arg1 : memref, %arg2 : mem // CHECK: %[[T19:.*]] = dim %[[ARG2]], %[[C3]] // CHECK: %[[SV2:.*]] = subview %[[ARG2]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0] // CHECK-SAME: [%[[T14]], %[[T16]], %[[T18]], %[[T19]]] -// CHECK: linalg.conv(%[[ARG0]], %[[SV1]], %[[SV2]]) \ No newline at end of file +// CHECK: linalg.conv(%[[ARG0]], %[[SV1]], %[[SV2]]) diff --git a/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir index b04195387f12e..223b6301207da 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir @@ -14,6 +14,20 @@ spv.module Logical GLSL450 requires #spv.vce { %4 = spv.GLSL.Sin %arg0 : f32 // CHECK: {{%.*}} = spv.GLSL.Tan {{%.*}} : f32 %5 = spv.GLSL.Tan %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Acos {{%.*}} : f32 + %6 = spv.GLSL.Acos %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Asin {{%.*}} : f32 + %7 = spv.GLSL.Asin %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Atan {{%.*}} : f32 + %8 = spv.GLSL.Atan %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Sinh {{%.*}} : f32 + %9 = spv.GLSL.Sinh %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Cosh {{%.*}} : f32 + %10 = spv.GLSL.Cosh %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Pow {{%.*}} : f32 + %11 = spv.GLSL.Pow %arg0, %arg1 : f32 + // CHECK: {{%.*}} = spv.GLSL.Round {{%.*}} : f32 + %12 = spv.GLSL.Round %arg0 : f32 spv.Return } } diff --git a/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir index 9e1e851918749..b3aaf63856a5d 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir @@ -19,4 +19,28 @@ spv.module Logical GLSL450 requires #spv.vce { %0 = spv.GroupBroadcast "Workgroup" %value, %localid : f32, vector<3xi32> spv.ReturnValue %0: f32 } + // CHECK-LABEL: @subgroup_block_read_intel + spv.func @subgroup_block_read_intel(%ptr : !spv.ptr) -> i32 "None" { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : i32 + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : i32 + spv.ReturnValue %0: i32 + } + // CHECK-LABEL: @subgroup_block_read_intel_vector + spv.func @subgroup_block_read_intel_vector(%ptr : !spv.ptr) -> vector<3xi32> "None" { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : vector<3xi32> + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : vector<3xi32> + spv.ReturnValue %0: vector<3xi32> + } + // CHECK-LABEL: @subgroup_block_write_intel + spv.func @subgroup_block_write_intel(%ptr : !spv.ptr, %value: i32) -> () "None" { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : i32 + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : i32 + spv.Return + } + // CHECK-LABEL: @subgroup_block_write_intel_vector + spv.func @subgroup_block_write_intel_vector(%ptr : !spv.ptr, %value: vector<3xi32>) -> () "None" { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : vector<3xi32> + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : vector<3xi32> + spv.Return + } } diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir index 54b810f43aec3..1de6b71d888d1 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir @@ -1,10 +1,7 @@ // RUN: mlir-opt -spirv-lower-abi-attrs -verify-diagnostics %s -o - | FileCheck %s module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { spv.module Physical64 OpenCL { // CHECK-LABEL: spv.module diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir index 28c44bf7b936d..5b06745eba874 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir @@ -2,9 +2,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.module diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir index 3d37f35b1c466..7d1a174fa3671 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir @@ -2,9 +2,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.module diff --git a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir index 572db88e5f9ec..74484fd7ab6b1 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir @@ -10,9 +10,7 @@ // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -26,9 +24,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @group_non_uniform_ballot(%predicate : i1) -> vector<4xi32> "None" { %0 = spv.GroupNonUniformBallot "Workgroup" %predicate : vector<4xi32> @@ -45,9 +41,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -61,9 +55,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -84,9 +76,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @group_non_uniform_iadd(%val : i32) -> i32 "None" { %0 = spv.GroupNonUniformIAdd "Subgroup" "Reduce" %val : i32 @@ -97,9 +87,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @group_non_uniform_iadd(%val : i32) -> i32 "None" { %0 = spv.GroupNonUniformIAdd "Subgroup" "Reduce" %val : i32 @@ -113,9 +101,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd_function(%val : i8) -> i8 "None" { %0 = spv.IAdd %val, %val : i8 @@ -127,9 +113,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @fadd_function(%val : f16) -> f16 "None" { %0 = spv.FAdd %val, %val : f16 @@ -148,9 +132,7 @@ spv.module Logical GLSL450 attributes { spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_shader_ballot, SPV_KHR_shader_clock, SPV_KHR_variable_pointers]>, {}> } { spv.func @subgroup_ballot(%predicate : i1) -> vector<4xi32> "None" { %0 = spv.SubgroupBallotKHR %predicate: vector<4xi32> @@ -165,9 +147,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical Vulkan attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -182,9 +162,7 @@ spv.module Logical Vulkan attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd_storage_buffer(%ptr : !spv.ptr) -> i16 "None" { %0 = spv.Load "StorageBuffer" %ptr : i16 @@ -200,8 +178,7 @@ spv.module Logical GLSL450 attributes { spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + {}> } { spv.globalVariable @data : !spv.ptr, Uniform> spv.globalVariable @img : !spv.ptr, UniformConstant> diff --git a/mlir/test/Dialect/SPIRV/glslops.mlir b/mlir/test/Dialect/SPIRV/glslops.mlir index 1e7b18ef71ffb..3e699ed05958c 100644 --- a/mlir/test/Dialect/SPIRV/glslops.mlir +++ b/mlir/test/Dialect/SPIRV/glslops.mlir @@ -155,3 +155,117 @@ func @tanvec(%arg0 : vector<3xf16>) -> () { %2 = spv.GLSL.Tan %arg0 : vector<3xf16> return } + +//===----------------------------------------------------------------------===// +// spv.GLSL.Acos +//===----------------------------------------------------------------------===// + +func @acos(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Acos {{%.*}} : f32 + %2 = spv.GLSL.Acos %arg0 : f32 + return +} + +func @acosvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Acos {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Acos %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Asin +//===----------------------------------------------------------------------===// + +func @asin(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Asin {{%.*}} : f32 + %2 = spv.GLSL.Asin %arg0 : f32 + return +} + +func @asinvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Asin {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Asin %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Atan +//===----------------------------------------------------------------------===// + +func @atan(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Atan {{%.*}} : f32 + %2 = spv.GLSL.Atan %arg0 : f32 + return +} + +func @atanvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Atan {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Atan %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Sinh +//===----------------------------------------------------------------------===// + +func @sinh(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Sinh {{%.*}} : f32 + %2 = spv.GLSL.Sinh %arg0 : f32 + return +} + +func @sinhvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Sinh {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Sinh %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Cosh +//===----------------------------------------------------------------------===// + +func @cosh(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Cosh {{%.*}} : f32 + %2 = spv.GLSL.Cosh %arg0 : f32 + return +} + +func @coshvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Cosh {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Cosh %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Pow +//===----------------------------------------------------------------------===// + +func @pow(%arg0 : f32, %arg1 : f32) -> () { + // CHECK: spv.GLSL.Pow {{%.*}}, {{%.*}} : f32 + %2 = spv.GLSL.Pow %arg0, %arg1 : f32 + return +} + +func @powvec(%arg0 : vector<3xf16>, %arg1 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Pow {{%.*}}, {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Pow %arg0, %arg1 : vector<3xf16> + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.GLSL.Round +//===----------------------------------------------------------------------===// + +func @round(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Round {{%.*}} : f32 + %2 = spv.GLSL.Round %arg0 : f32 + return +} + +func @roundvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Round {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Round %arg0 : vector<3xf16> + return +} diff --git a/mlir/test/Dialect/SPIRV/group-ops.mlir b/mlir/test/Dialect/SPIRV/group-ops.mlir index 93e9054050ecc..55a07270a348f 100644 --- a/mlir/test/Dialect/SPIRV/group-ops.mlir +++ b/mlir/test/Dialect/SPIRV/group-ops.mlir @@ -61,3 +61,43 @@ func @group_broadcast_negative_locid_vec4(%value: f32, %localid: vector<4xi32> ) %0 = spv.GroupBroadcast "Subgroup" %value, %localid : f32, vector<4xi32> return %0: f32 } + +// ----- + +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockReadINTEL +//===----------------------------------------------------------------------===// + +func @subgroup_block_read_intel(%ptr : !spv.ptr) -> i32 { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : i32 + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : i32 + return %0: i32 +} + +// ----- + +func @subgroup_block_read_intel_vector(%ptr : !spv.ptr) -> vector<3xi32> { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : vector<3xi32> + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : vector<3xi32> + return %0: vector<3xi32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockWriteINTEL +//===----------------------------------------------------------------------===// + +func @subgroup_block_write_intel(%ptr : !spv.ptr, %value: i32) -> () { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : i32 + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : i32 + return +} + +// ----- + +func @subgroup_block_write_intel_vector(%ptr : !spv.ptr, %value: vector<3xi32>) -> () { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : vector<3xi32> + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : vector<3xi32> + return +} \ No newline at end of file diff --git a/mlir/test/Dialect/SPIRV/target-and-abi.mlir b/mlir/test/Dialect/SPIRV/target-and-abi.mlir index 8d11f4ca0c642..cd338752600ab 100644 --- a/mlir/test/Dialect/SPIRV/target-and-abi.mlir +++ b/mlir/test/Dialect/SPIRV/target-and-abi.mlir @@ -104,15 +104,6 @@ func @interface_var( // spv.target_env //===----------------------------------------------------------------------===// -func @target_env_missing_limits() attributes { - spv.target_env = #spv.target_env< - #spv.vce, - // expected-error @+1 {{limits must be a dictionary attribute containing two 32-bit integer attributes 'max_compute_workgroup_invocations' and 'max_compute_workgroup_size'}} - {max_compute_workgroup_size = dense<[128, 64, 64]> : vector<3xi32>}> -} { return } - -// ----- - func @target_env_wrong_limits() attributes { spv.target_env = #spv.target_env< #spv.vce, diff --git a/mlir/test/Dialect/SPIRV/target-env.mlir b/mlir/test/Dialect/SPIRV/target-env.mlir index 27c4e8d04092b..c0bc02fae0894 100644 --- a/mlir/test/Dialect/SPIRV/target-env.mlir +++ b/mlir/test/Dialect/SPIRV/target-env.mlir @@ -35,7 +35,7 @@ // CHECK-LABEL: @cmp_exchange_weak_suitable_version_capabilities func @cmp_exchange_weak_suitable_version_capabilities(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.AtomicCompareExchangeWeak "Workgroup" "AcquireRelease|AtomicCounterMemory" "Acquire" %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -44,7 +44,7 @@ func @cmp_exchange_weak_suitable_version_capabilities(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_atomic_compare_exchange_weak_op %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -57,7 +57,7 @@ func @cmp_exchange_weak_unsupported_version(%ptr: !spv.ptr, %val // CHECK-LABEL: @group_non_uniform_ballot_suitable_version func @group_non_uniform_ballot_suitable_version(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.GroupNonUniformBallot "Workgroup" %0 = "test.convert_to_group_non_uniform_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -66,7 +66,7 @@ func @group_non_uniform_ballot_suitable_version(%predicate: i1) -> vector<4xi32> // CHECK-LABEL: @group_non_uniform_ballot_unsupported_version func @group_non_uniform_ballot_unsupported_version(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_group_non_uniform_ballot_op %0 = "test.convert_to_group_non_uniform_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -79,7 +79,7 @@ func @group_non_uniform_ballot_unsupported_version(%predicate: i1) -> vector<4xi // CHECK-LABEL: @cmp_exchange_weak_missing_capability_kernel func @cmp_exchange_weak_missing_capability_kernel(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_atomic_compare_exchange_weak_op %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -88,7 +88,7 @@ func @cmp_exchange_weak_missing_capability_kernel(%ptr: !spv.ptr // CHECK-LABEL: @cmp_exchange_weak_missing_capability_atomic_storage func @cmp_exchange_weak_missing_capability_atomic_storage(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_atomic_compare_exchange_weak_op %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -97,7 +97,7 @@ func @cmp_exchange_weak_missing_capability_atomic_storage(%ptr: !spv.ptr vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_subgroup_ballot_op %0 = "test.convert_to_subgroup_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -106,7 +106,7 @@ func @subgroup_ballot_missing_capability(%predicate: i1) -> vector<4xi32> attrib // CHECK-LABEL: @bit_reverse_directly_implied_capability func @bit_reverse_directly_implied_capability(%operand: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.BitReverse %0 = "test.convert_to_bit_reverse_op"(%operand): (i32) -> (i32) @@ -115,7 +115,7 @@ func @bit_reverse_directly_implied_capability(%operand: i32) -> i32 attributes { // CHECK-LABEL: @bit_reverse_recursively_implied_capability func @bit_reverse_recursively_implied_capability(%operand: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.BitReverse %0 = "test.convert_to_bit_reverse_op"(%operand): (i32) -> (i32) @@ -128,7 +128,7 @@ func @bit_reverse_recursively_implied_capability(%operand: i32) -> i32 attribute // CHECK-LABEL: @subgroup_ballot_suitable_extension func @subgroup_ballot_suitable_extension(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.SubgroupBallotKHR %0 = "test.convert_to_subgroup_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -137,7 +137,7 @@ func @subgroup_ballot_suitable_extension(%predicate: i1) -> vector<4xi32> attrib // CHECK-LABEL: @subgroup_ballot_missing_extension func @subgroup_ballot_missing_extension(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_subgroup_ballot_op %0 = "test.convert_to_subgroup_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -146,7 +146,7 @@ func @subgroup_ballot_missing_extension(%predicate: i1) -> vector<4xi32> attribu // CHECK-LABEL: @module_suitable_extension1 func @module_suitable_extension1() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.module PhysicalStorageBuffer64 Vulkan "test.convert_to_module_op"() : () ->() @@ -155,7 +155,7 @@ func @module_suitable_extension1() attributes { // CHECK-LABEL: @module_suitable_extension2 func @module_suitable_extension2() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.module PhysicalStorageBuffer64 Vulkan "test.convert_to_module_op"() : () -> () @@ -164,7 +164,7 @@ func @module_suitable_extension2() attributes { // CHECK-LABEL: @module_missing_extension_mm func @module_missing_extension_mm() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_module_op "test.convert_to_module_op"() : () -> () @@ -173,7 +173,7 @@ func @module_missing_extension_mm() attributes { // CHECK-LABEL: @module_missing_extension_am func @module_missing_extension_am() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_module_op "test.convert_to_module_op"() : () -> () @@ -183,7 +183,7 @@ func @module_missing_extension_am() attributes { // CHECK-LABEL: @module_implied_extension func @module_implied_extension() attributes { // Version 1.5 implies SPV_KHR_vulkan_memory_model and SPV_KHR_physical_storage_buffer. - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.module PhysicalStorageBuffer64 Vulkan "test.convert_to_module_op"() : () -> () diff --git a/mlir/test/Dialect/Standard/invalid.mlir b/mlir/test/Dialect/Standard/invalid.mlir index f2b71f634cd3d..7f9c564e74f3f 100644 --- a/mlir/test/Dialect/Standard/invalid.mlir +++ b/mlir/test/Dialect/Standard/invalid.mlir @@ -15,3 +15,69 @@ func @test_index_cast_tensor_error(%arg0 : tensor) -> i64 { %0 = index_cast %arg0 : tensor to i64 return %0 : i64 } + +// ----- + +func @dynamic_tensor_from_elements(%m : index) + -> tensor { + // expected-error @+1 {{must have as many index operands as dynamic extents in the result type}} + %tnsr = dynamic_tensor_from_elements %m { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+1 {{must have one body argument per input dimension}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+1 {{all body arguments must be index}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : i64): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+2 {{op expects regions to end with 'std.yield', found 'std.return'}} + // expected-note @+1 {{in custom textual format, the absence of terminator implies 'std.yield'}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8.0 : f32 + return %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+1 {{body must be terminated with a `yield` operation of the tensor element type}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8 : i32 + yield %elem : i32 + } : tensor + return %tnsr : tensor +} diff --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/Standard/ops.mlir index 24da04eebaaa6..a765acb9657b5 100644 --- a/mlir/test/Dialect/Standard/ops.mlir +++ b/mlir/test/Dialect/Standard/ops.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-opt -split-input-file %s | FileCheck %s +// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s --mlir-print-op-generic | mlir-opt | FileCheck %s // CHECK-LABEL: test_index_cast func @test_index_cast(%arg0 : index) -> i64 { @@ -22,3 +23,14 @@ func @assert(%arg : i1) { assert %arg, "Some message in case this assertion fails." return } + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index e34e3428c185e..aaaa7adf6472c 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -785,43 +785,63 @@ func @genbool_3d() -> vector<2x3x4xi1> { return %v: vector<2x3x4xi1> } -// CHECK-LABEL: func @genbool_var_1d -// CHECK-SAME: %[[A:.*]]: index -// CHECK: %[[C1:.*]] = constant dense<[0, 1, 2]> : vector<3xi64> -// CHECK: %[[T0:.*]] = index_cast %[[A]] : index to i64 -// CHECK: %[[T1:.*]] = splat %[[T0]] : vector<3xi64> -// CHECK: %[[T2:.*]] = cmpi "slt", %[[C1]], %[[T1]] : vector<3xi64> -// CHECK: return %[[T2]] : vector<3xi1> +// CHECK-LABEL: func @genbool_var_1d( +// CHECK-SAME: %[[A:.*]]: index) +// CHECK: %[[T0:.*]] = vector.create_mask %[[A]] : vector<3xi1> +// CHECK: return %[[T0]] : vector<3xi1> func @genbool_var_1d(%arg0: index) -> vector<3xi1> { %0 = vector.create_mask %arg0 : vector<3xi1> return %0 : vector<3xi1> } -// CHECK-LABEL: func @genbool_var_2d -// CHECK-SAME: %[[A:.*0]]: index -// CHECK-SAME: %[[B:.*1]]: index -// CHECK: %[[CI:.*]] = constant dense<[0, 1, 2]> : vector<3xi64> -// CHECK: %[[CF:.*]] = constant dense : vector<3xi1> +// CHECK-LABEL: func @genbool_var_2d( +// CHECK-SAME: %[[A:.*0]]: index, +// CHECK-SAME: %[[B:.*1]]: index) +// CHECK: %[[C1:.*]] = constant dense : vector<3xi1> // CHECK: %[[C2:.*]] = constant dense : vector<2x3xi1> // CHECK: %[[c0:.*]] = constant 0 : index // CHECK: %[[c1:.*]] = constant 1 : index -// CHECK: %[[T0:.*]] = index_cast %[[B]] : index to i64 -// CHECK: %[[T1:.*]] = splat %[[T0]] : vector<3xi64> -// CHECK: %[[T2:.*]] = cmpi "slt", %[[CI]], %[[T1]] : vector<3xi64> -// CHECK: %[[T3:.*]] = cmpi "slt", %[[c0]], %[[A]] : index -// CHECK: %[[T4:.*]] = select %[[T3]], %[[T2]], %[[CF]] : vector<3xi1> -// CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[C2]] [0] : vector<3xi1> into vector<2x3xi1> -// CHECK: %[[T6:.*]] = cmpi "slt", %[[c1]], %[[A]] : index -// CHECK: %[[T7:.*]] = select %[[T6]], %[[T2]], %[[CF]] : vector<3xi1> -// CHECK: %[[T8:.*]] = vector.insert %[[T7]], %[[T5]] [1] : vector<3xi1> into vector<2x3xi1> -// CHECK: return %[[T8]] : vector<2x3xi1> +// CHECK: %[[T0:.*]] = vector.create_mask %[[B]] : vector<3xi1> +// CHECK: %[[T1:.*]] = cmpi "slt", %[[c0]], %[[A]] : index +// CHECK: %[[T2:.*]] = select %[[T1]], %[[T0]], %[[C1]] : vector<3xi1> +// CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C2]] [0] : vector<3xi1> into vector<2x3xi1> +// CHECK: %[[T4:.*]] = cmpi "slt", %[[c1]], %[[A]] : index +// CHECK: %[[T5:.*]] = select %[[T4]], %[[T0]], %[[C1]] : vector<3xi1> +// CHECK: %[[T6:.*]] = vector.insert %[[T5]], %[[T3]] [1] : vector<3xi1> into vector<2x3xi1> +// CHECK: return %[[T6]] : vector<2x3xi1> func @genbool_var_2d(%arg0: index, %arg1: index) -> vector<2x3xi1> { %0 = vector.create_mask %arg0, %arg1 : vector<2x3xi1> return %0 : vector<2x3xi1> } +// CHECK-LABEL: func @genbool_var_3d( +// CHECK-SAME: %[[A:.*0]]: index, +// CHECK-SAME: %[[B:.*1]]: index, +// CHECK-SAME: %[[C:.*2]]: index) +// CHECK: %[[C1:.*]] = constant dense : vector<7xi1> +// CHECK: %[[C2:.*]] = constant dense : vector<1x7xi1> +// CHECK: %[[C3:.*]] = constant dense : vector<2x1x7xi1> +// CHECK: %[[c0:.*]] = constant 0 : index +// CHECK: %[[c1:.*]] = constant 1 : index +// CHECK: %[[T0:.*]] = vector.create_mask %[[C]] : vector<7xi1> +// CHECK: %[[T1:.*]] = cmpi "slt", %[[c0]], %[[B]] : index +// CHECK: %[[T2:.*]] = select %[[T1]], %[[T0]], %[[C1]] : vector<7xi1> +// CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C2]] [0] : vector<7xi1> into vector<1x7xi1> +// CHECK: %[[T4:.*]] = cmpi "slt", %[[c0]], %[[A]] : index +// CHECK: %[[T5:.*]] = select %[[T4]], %[[T3]], %[[C2]] : vector<1x7xi1> +// CHECK: %[[T6:.*]] = vector.insert %[[T5]], %[[C3]] [0] : vector<1x7xi1> into vector<2x1x7xi1> +// CHECK: %[[T7:.*]] = cmpi "slt", %[[c1]], %[[A]] : index +// CHECK: %[[T8:.*]] = select %[[T7]], %[[T3]], %[[C2]] : vector<1x7xi1> +// CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T6]] [1] : vector<1x7xi1> into vector<2x1x7xi1> +// CHECK: return %[[T9]] : vector<2x1x7xi1> + +func @genbool_var_3d(%arg0: index, %arg1: index, %arg2: index) -> vector<2x1x7xi1> { + %0 = vector.create_mask %arg0, %arg1, %arg2 : vector<2x1x7xi1> + return %0 : vector<2x1x7xi1> +} + #matmat_accesses_0 = [ affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index 062e4b5912297..4695090dacb52 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -1089,7 +1089,7 @@ TEST_FUNC(vector_extractelement_op_i32) { ScopedContext scope(builder, f.getLoc()); auto i32Type = builder.getI32Type(); auto vectorType = VectorType::get(/*shape=*/{8}, i32Type); - vector_extractelement( + vector_extract_element( i32Type, std_constant(vectorType, builder.getI32VectorAttr({10})), std_constant_int(0, i32Type)); diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir index 73622e37ade57..6cb7cdec3442e 100644 --- a/mlir/test/mlir-vulkan-runner/addf.mlir +++ b/mlir/test/mlir-vulkan-runner/addf.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>) diff --git a/mlir/test/mlir-vulkan-runner/addi.mlir b/mlir/test/mlir-vulkan-runner/addi.mlir index c690120718b2e..696c5015565db 100644 --- a/mlir/test/mlir-vulkan-runner/addi.mlir +++ b/mlir/test/mlir-vulkan-runner/addi.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_addi(%arg0 : memref<8xi32>, %arg1 : memref<8x8xi32>, %arg2 : memref<8x8x8xi32>) diff --git a/mlir/test/mlir-vulkan-runner/addi8.mlir b/mlir/test/mlir-vulkan-runner/addi8.mlir index 094186d5731d3..eeb5222856961 100644 --- a/mlir/test/mlir-vulkan-runner/addi8.mlir +++ b/mlir/test/mlir-vulkan-runner/addi8.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_addi(%arg0 : memref<8xi8>, %arg1 : memref<8x8xi8>, %arg2 : memref<8x8x8xi32>) diff --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/mlir-vulkan-runner/mulf.mlir index be0bd5afb4252..0abcb53ebfe6b 100644 --- a/mlir/test/mlir-vulkan-runner/mulf.mlir +++ b/mlir/test/mlir-vulkan-runner/mulf.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_mul(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>) diff --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/mlir-vulkan-runner/subf.mlir index 5fc7e0a91d29b..77c1f8841e8be 100644 --- a/mlir/test/mlir-vulkan-runner/subf.mlir +++ b/mlir/test/mlir-vulkan-runner/subf.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_sub(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>) diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir index 9a96d7f819fde..21b4b76d1df08 100644 --- a/mlir/test/mlir-vulkan-runner/time.mlir +++ b/mlir/test/mlir-vulkan-runner/time.mlir @@ -7,9 +7,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_add(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>) diff --git a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt index 3736a18b20091..bc9a0c1f310a1 100644 --- a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt +++ b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt @@ -7,6 +7,6 @@ add_llvm_tool(mlir-linalg-ods-gen ) llvm_update_compile_flags(mlir-linalg-ods-gen) target_link_libraries(mlir-linalg-ods-gen PRIVATE - MLIRParser MLIRSupport + MLIRIR ) diff --git a/mlir/tools/mlir-tblgen/StructsGen.cpp b/mlir/tools/mlir-tblgen/StructsGen.cpp index cccacc0cad853..2606dfe3696bd 100644 --- a/mlir/tools/mlir-tblgen/StructsGen.cpp +++ b/mlir/tools/mlir-tblgen/StructsGen.cpp @@ -143,7 +143,7 @@ static void emitFactoryDef(llvm::StringRef structName, )"; for (auto field : fields) { - if (field.getType().isOptional()) + if (field.getType().isOptional() || field.getType().hasDefaultValue()) os << llvm::formatv(getFieldInfoOptional, field.getName()); else os << llvm::formatv(getFieldInfo, field.getName()); @@ -169,7 +169,7 @@ bool {0}::classof(::mlir::Attribute attr))"; auto derived = attr.dyn_cast<::mlir::DictionaryAttr>(); if (!derived) return false; - int empty_optionals = 0; + int num_absent_attrs = 0; )"; os << llvm::formatv(classofInfo, structName) << " {"; @@ -184,7 +184,7 @@ bool {0}::classof(::mlir::Attribute attr))"; const char *classofArgInfoOptional = R"( auto {0} = derived.get("{0}"); if (!{0}) - ++empty_optionals; + ++num_absent_attrs; else if (!({1})) return false; )"; @@ -193,14 +193,14 @@ bool {0}::classof(::mlir::Attribute attr))"; auto type = field.getType(); std::string condition = std::string(tgfmt(type.getConditionTemplate(), &fctx.withSelf(name))); - if (type.isOptional()) + if (type.isOptional() || type.hasDefaultValue()) os << llvm::formatv(classofArgInfoOptional, name, condition); else os << llvm::formatv(classofArgInfo, name, condition); } const char *classofEndInfo = R"( - return derived.size() + empty_optionals == {0}; + return derived.size() + num_absent_attrs == {0}; } )"; os << llvm::formatv(classofEndInfo, fields.size()); @@ -229,14 +229,35 @@ emitAccessorDef(llvm::StringRef structName, return {1}.cast<{0}>(); } )"; + const char *fieldInfoDefaultValued = R"( +{0} {2}::{1}() const { + auto derived = this->cast<::mlir::DictionaryAttr>(); + auto {1} = derived.get("{1}"); + if (!{1}) { + ::mlir::Builder builder(getContext()); + return {3}; + } + assert({1}.isa<{0}>() && "incorrect Attribute type found."); + return {1}.cast<{0}>(); +} +)"; + FmtContext fmtCtx; + fmtCtx.withBuilder("builder"); + for (auto field : fields) { auto name = field.getName(); auto type = field.getType(); auto storage = type.getStorageType(); - if (type.isOptional()) + if (type.isOptional()) { os << llvm::formatv(fieldInfoOptional, storage, name, structName); - else + } else if (type.hasDefaultValue()) { + std::string defaultValue = tgfmt(type.getConstBuilderTemplate(), &fmtCtx, + type.getDefaultValue()); + os << llvm::formatv(fieldInfoDefaultValued, storage, name, structName, + defaultValue); + } else { os << llvm::formatv(fieldInfo, storage, name, structName); + } } } diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp index cf84856ddb849..06ca10f51b9b7 100644 --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -11,13 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "mlir/InitAllDialects.h" #include "mlir/InitAllTranslations.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Translation.h" using namespace mlir; - namespace mlir { // Defined in the test directory, no public header. void registerTestRoundtripSPIRV(); diff --git a/mlir/unittests/Pass/PassManagerTest.cpp b/mlir/unittests/Pass/PassManagerTest.cpp index 29086a2994e8b..99d4972ef63c0 100644 --- a/mlir/unittests/Pass/PassManagerTest.cpp +++ b/mlir/unittests/Pass/PassManagerTest.cpp @@ -74,4 +74,47 @@ TEST(PassManagerTest, OpSpecificAnalysis) { } } +namespace { +struct InvalidPass : Pass { + InvalidPass() : Pass(TypeID::get(), StringRef("invalid_op")) {} + StringRef getName() const override { return "Invalid Pass"; } + void runOnOperation() override {} + + /// A clone method to create a copy of this pass. + std::unique_ptr clonePass() const override { + return std::make_unique( + *static_cast(this)); + } +}; +} // anonymous namespace + +TEST(PassManagerTest, InvalidPass) { + MLIRContext context; + + // Create a module + OwningModuleRef module(ModuleOp::create(UnknownLoc::get(&context))); + + // Add a single "invalid_op" operation + OpBuilder builder(&module->getBodyRegion()); + OperationState state(UnknownLoc::get(&context), "invalid_op"); + builder.insert(Operation::create(state)); + + // Register a diagnostic handler to capture the diagnostic so that we can + // check it later. + std::unique_ptr diagnostic; + context.getDiagEngine().registerHandler([&](Diagnostic &diag) { + diagnostic.reset(new Diagnostic(std::move(diag))); + }); + + // Instantiate and run our pass. + PassManager pm(&context); + pm.addPass(std::make_unique()); + LogicalResult result = pm.run(module.get()); + EXPECT_TRUE(failed(result)); + ASSERT_TRUE(diagnostic.get() != nullptr); + EXPECT_EQ( + diagnostic->str(), + "'invalid_op' op trying to schedule a pass on an unregistered operation"); +} + } // end namespace diff --git a/mlir/unittests/TableGen/StructsGenTest.cpp b/mlir/unittests/TableGen/StructsGenTest.cpp index 14b0abc675bff..d2acb28ebfb13 100644 --- a/mlir/unittests/TableGen/StructsGenTest.cpp +++ b/mlir/unittests/TableGen/StructsGenTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/Identifier.h" #include "mlir/IR/StandardTypes.h" #include "llvm/ADT/DenseMap.h" @@ -34,9 +35,10 @@ static test::TestStruct getTestStruct(mlir::MLIRContext *context) { auto elementsAttr = mlir::DenseIntElementsAttr::get(elementsType, {1, 2, 3, 4, 5, 6}); auto optionalAttr = nullptr; + auto defaultValuedAttr = nullptr; return test::TestStruct::get(integerAttr, floatAttr, elementsAttr, - optionalAttr, context); + optionalAttr, defaultValuedAttr, context); } /// Validates that test::TestStruct::classof correctly identifies a valid @@ -167,4 +169,12 @@ TEST(StructsGenTest, EmptyOptional) { EXPECT_EQ(structAttr.sample_optional_integer(), nullptr); } +TEST(StructsGenTest, GetDefaultValuedAttr) { + mlir::MLIRContext context; + mlir::Builder builder(&context); + auto structAttr = getTestStruct(&context); + EXPECT_EQ(structAttr.sample_default_valued_integer(), + builder.getI32IntegerAttr(42)); +} + } // namespace mlir diff --git a/mlir/unittests/TableGen/structs.td b/mlir/unittests/TableGen/structs.td index cf5e4f5448f00..06a15e181484f 100644 --- a/mlir/unittests/TableGen/structs.td +++ b/mlir/unittests/TableGen/structs.td @@ -17,6 +17,8 @@ def Test_Struct : StructAttr<"TestStruct", Test_Dialect, [ StructFieldAttr<"sample_float", F32Attr>, StructFieldAttr<"sample_elements", I32ElementsAttr>, StructFieldAttr<"sample_optional_integer", - OptionalAttr>] > { + OptionalAttr>, + StructFieldAttr<"sample_default_valued_integer", + DefaultValuedAttr>] > { let description = "Structure for test data"; } diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake index 90e0704c4a94c..1d46b141ffdf8 100644 --- a/openmp/cmake/OpenMPTesting.cmake +++ b/openmp/cmake/OpenMPTesting.cmake @@ -50,13 +50,6 @@ endfunction() if (${OPENMP_STANDALONE_BUILD}) find_standalone_test_dependencies() - # Make sure we can use the console pool for recent CMake and Ninja > 1.5. - if (CMAKE_VERSION VERSION_LESS 3.1.20141117) - set(cmake_3_2_USES_TERMINAL) - else() - set(cmake_3_2_USES_TERMINAL USES_TERMINAL) - endif() - # Set lit arguments. set(DEFAULT_LIT_ARGS "-sv --show-unsupported --show-xfail") if (MSVC OR XCODE) @@ -189,7 +182,7 @@ function(add_openmp_testsuite target comment) COMMAND ${PYTHON_EXECUTABLE} ${OPENMP_LLVM_LIT_EXECUTABLE} ${LIT_ARGS} ${ARG_UNPARSED_ARGUMENTS} COMMENT ${comment} DEPENDS ${ARG_DEPENDS} - ${cmake_3_2_USES_TERMINAL} + USES_TERMINAL ) else() if (ARG_EXCLUDE_FROM_CHECK_ALL) diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h index 88807de4e19c7..6d5d6cd19bd6e 100644 --- a/openmp/libomptarget/deviceRTLs/common/omptarget.h +++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h @@ -252,7 +252,7 @@ class omptarget_nvptx_TeamDescr { workDescrForActiveParallel; // one, ONLY for the active par ALIGN(16) - __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE]; + __kmpc_data_sharing_worker_slot_static worker_rootS[DS_Max_Warp_Number]; ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1]; }; diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu index ca2fd1d307542..9b116aba2fc39 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu @@ -26,7 +26,7 @@ INLINE static void data_sharing_init_stack_common() { omptarget_nvptx_TeamDescr *teamDescr = &omptarget_nvptx_threadPrivateContext->TeamContext(); - for (int WID = 0; WID < WARPSIZE; WID++) { + for (int WID = 0; WID < DS_Max_Warp_Number; WID++) { __kmpc_data_sharing_slot *RootS = teamDescr->GetPreallocatedSlotAddr(WID); DataSharingState.SlotPtr[WID] = RootS; DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0]; diff --git a/openmp/runtime/cmake/LibompCheckFortranFlag.cmake b/openmp/runtime/cmake/LibompCheckFortranFlag.cmake index 21837ef068e36..b8cdb28a4bf28 100644 --- a/openmp/runtime/cmake/LibompCheckFortranFlag.cmake +++ b/openmp/runtime/cmake/LibompCheckFortranFlag.cmake @@ -19,54 +19,9 @@ function(libomp_check_fortran_flag flag boolean) print *, \"Hello World!\" end program hello") - set(failed_regexes "[Ee]rror;[Uu]nknown;[Ss]kipping") - if(CMAKE_VERSION VERSION_GREATER 3.1 OR CMAKE_VERSION VERSION_EQUAL 3.1) + set(failed_regexes "[Ee]rror;[Uu]nknown;[Ss]kipping") include(CheckFortranSourceCompiles) check_fortran_source_compiles("${fortran_source}" ${boolean} FAIL_REGEX "${failed_regexes}") set(${boolean} ${${boolean}} PARENT_SCOPE) - return() - else() - # Our manual check for cmake versions that don't have CheckFortranSourceCompiles - set(base_dir ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/fortran_flag_check) - file(MAKE_DIRECTORY ${base_dir}) - file(WRITE ${base_dir}/fortran_source.f "${fortran_source}") - - message(STATUS "Performing Test ${boolean}") - execute_process( - COMMAND ${CMAKE_Fortran_COMPILER} "${flag}" ${base_dir}/fortran_source.f - WORKING_DIRECTORY ${base_dir} - RESULT_VARIABLE exit_code - OUTPUT_VARIABLE OUTPUT - ERROR_VARIABLE OUTPUT - ) - - if(${exit_code} EQUAL 0) - foreach(regex IN LISTS failed_regexes) - if("${OUTPUT}" MATCHES ${regex}) - set(retval FALSE) - endif() - endforeach() - else() - set(retval FALSE) - endif() - - if(${retval}) - set(${boolean} 1 CACHE INTERNAL "Test ${boolean}") - message(STATUS "Performing Test ${boolean} - Success") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Performing Fortran Compiler Flag test ${boolean} succeeded with the following output:\n" - "${OUTPUT}\n" - "Source file was:\n${fortran_source}\n") - else() - set(${boolean} "" CACHE INTERNAL "Test ${boolean}") - message(STATUS "Performing Test ${boolean} - Failed") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log - "Performing Fortran Compiler Flag test ${boolean} failed with the following output:\n" - "${OUTPUT}\n" - "Source file was:\n${fortran_source}\n") - endif() - endif() - - set(${boolean} ${retval} PARENT_SCOPE) endif() endfunction() diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake index 86de6f10686eb..518a09b45a420 100644 --- a/polly/cmake/polly_macros.cmake +++ b/polly/cmake/polly_macros.cmake @@ -72,21 +72,6 @@ macro(add_polly_loadable_module name) endif() endmacro(add_polly_loadable_module) -# Use C99-compatible compile mode for all C source files of a target. -function(target_enable_c99 _target) - if(CMAKE_VERSION VERSION_GREATER "3.1") - set_target_properties("${_target}" PROPERTIES C_STANDARD 99) - elseif(CMAKE_COMPILER_IS_GNUCC) - get_target_property(_sources "${_target}" SOURCES) - foreach(_file IN LISTS _sources) - get_source_file_property(_lang "${_file}" LANGUAGE) - if(_lang STREQUAL "C") - set_source_files_properties(${_file} COMPILE_FLAGS "-std=gnu99") - endif() - endforeach() - endif() -endfunction() - # Recursive helper for setup_source_group. Traverse the file system and add # source files matching the glob_expr to the prefix, recursing into # subdirectories as they are encountered diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt index 1039079cb49ca..c953ea48475d7 100644 --- a/polly/lib/External/CMakeLists.txt +++ b/polly/lib/External/CMakeLists.txt @@ -293,8 +293,7 @@ if (POLLY_BUNDLED_ISL) ) # ISL requires at least C99 to compile. gcc < 5.0 use -std=gnu89 as default. - target_enable_c99(PollyISL) - target_enable_c99(polly-isl-test) + set_property(TARGET PollyISL polly-isl-test PROPERTY C_STANDARD 99) endif (POLLY_BUNDLED_ISL) set(PET_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pet")